1 ;; Machine description for AArch64 SVE.
2 ;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The file is organised into the following sections (search for the full
25 ;; ---- Note on the handling of big-endian SVE
26 ;; ---- Description of UNSPEC_PTEST
27 ;; ---- Description of UNSPEC_PRED_Z
28 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
29 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
30 ;; ---- Note on FFR handling
33 ;; ---- Moves of single vectors
34 ;; ---- Moves of multiple vectors
35 ;; ---- Moves of predicates
36 ;; ---- Moves of multiple predicates
37 ;; ---- Moves relating to the FFR
40 ;; ---- Normal contiguous loads
41 ;; ---- Extending contiguous loads
42 ;; ---- First-faulting contiguous loads
43 ;; ---- First-faulting extending contiguous loads
44 ;; ---- Non-temporal contiguous loads
45 ;; ---- Normal gather loads
46 ;; ---- Extending gather loads
47 ;; ---- First-faulting gather loads
48 ;; ---- First-faulting extending gather loads
51 ;; ---- Contiguous prefetches
52 ;; ---- Gather prefetches
55 ;; ---- Normal contiguous stores
56 ;; ---- Truncating contiguous stores
57 ;; ---- Non-temporal contiguous stores
58 ;; ---- Normal scatter stores
59 ;; ---- Truncating scatter stores
62 ;; ---- [INT,FP] Duplicate element
63 ;; ---- [INT,FP] Initialize from individual elements
64 ;; ---- [INT] Linear series
65 ;; ---- [PRED] Duplicate element
67 ;; == Vector decomposition
68 ;; ---- [INT,FP] Extract index
69 ;; ---- [INT,FP] Extract active element
70 ;; ---- [PRED] Extract index
72 ;; == Unary arithmetic
73 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
74 ;; ---- [INT] General unary arithmetic corresponding to unspecs
75 ;; ---- [INT] Sign and zero extension
76 ;; ---- [INT] Truncation
77 ;; ---- [INT] Logical inverse
78 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
79 ;; ---- [FP] General unary arithmetic corresponding to unspecs
80 ;; ---- [FP] Square root
81 ;; ---- [FP] Reciprocal square root
82 ;; ---- [PRED] Inverse
84 ;; == Binary arithmetic
85 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
86 ;; ---- [INT] Addition
87 ;; ---- [INT] Subtraction
88 ;; ---- [INT] Take address
89 ;; ---- [INT] Absolute difference
90 ;; ---- [INT] Saturating addition and subtraction
91 ;; ---- [INT] Highpart multiplication
92 ;; ---- [INT] Division
93 ;; ---- [INT] Binary logical operations
94 ;; ---- [INT] Binary logical operations (inverted second input)
95 ;; ---- [INT] Shifts (rounding towards -Inf)
96 ;; ---- [INT] Shifts (rounding towards 0)
97 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
98 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
99 ;; ---- [FP] General binary arithmetic corresponding to unspecs
100 ;; ---- [FP] Addition
101 ;; ---- [FP] Complex addition
102 ;; ---- [FP] Subtraction
103 ;; ---- [FP] Absolute difference
104 ;; ---- [FP] Multiplication
105 ;; ---- [FP] Division
106 ;; ---- [FP] Binary logical operations
107 ;; ---- [FP] Sign copying
108 ;; ---- [FP] Maximum and minimum
109 ;; ---- [PRED] Binary logical operations
110 ;; ---- [PRED] Binary logical operations (inverted second input)
111 ;; ---- [PRED] Binary logical operations (inverted result)
113 ;; == Ternary arithmetic
114 ;; ---- [INT] MLA and MAD
115 ;; ---- [INT] MLS and MSB
116 ;; ---- [INT] Dot product
117 ;; ---- [INT] Sum of absolute differences
118 ;; ---- [INT] Matrix multiply-accumulate
119 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
120 ;; ---- [FP] Complex multiply-add
121 ;; ---- [FP] Trigonometric multiply-add
122 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
123 ;; ---- [FP] Matrix multiply-accumulate
125 ;; == Comparisons and selects
126 ;; ---- [INT,FP] Select based on predicates
127 ;; ---- [INT] Comparisons
128 ;; ---- [INT] While tests
129 ;; ---- [FP] Direct comparisons
130 ;; ---- [FP] Absolute comparisons
131 ;; ---- [PRED] Select
132 ;; ---- [PRED] Test bits
135 ;; ---- [INT,FP] Conditional reductions
136 ;; ---- [INT] Tree reductions
137 ;; ---- [FP] Tree reductions
138 ;; ---- [FP] Left-to-right reductions
141 ;; ---- [INT,FP] General permutes
142 ;; ---- [INT,FP] Special-purpose unary permutes
143 ;; ---- [INT,FP] Special-purpose binary permutes
144 ;; ---- [PRED] Special-purpose unary permutes
145 ;; ---- [PRED] Special-purpose binary permutes
148 ;; ---- [INT<-INT] Packs
149 ;; ---- [INT<-INT] Unpacks
150 ;; ---- [INT<-FP] Conversions
151 ;; ---- [INT<-FP] Packs
152 ;; ---- [INT<-FP] Unpacks
153 ;; ---- [FP<-INT] Conversions
154 ;; ---- [FP<-INT] Packs
155 ;; ---- [FP<-INT] Unpacks
156 ;; ---- [FP<-FP] Packs
157 ;; ---- [FP<-FP] Packs (bfloat16)
158 ;; ---- [FP<-FP] Unpacks
159 ;; ---- [PRED<-PRED] Packs
160 ;; ---- [PRED<-PRED] Unpacks
162 ;; == Vector partitioning
163 ;; ---- [PRED] Unary partitioning
164 ;; ---- [PRED] Binary partitioning
165 ;; ---- [PRED] Scalarization
167 ;; == Counting elements
168 ;; ---- [INT] Count elements in a pattern (scalar)
169 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
170 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
171 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
172 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
173 ;; ---- [INT] Count elements in a predicate (scalar)
174 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
175 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
176 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
177 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
179 ;; =========================================================================
181 ;; =========================================================================
183 ;; -------------------------------------------------------------------------
184 ;; ---- Note on the handling of big-endian SVE
185 ;; -------------------------------------------------------------------------
187 ;; On big-endian systems, Advanced SIMD mov<mode> patterns act in the
188 ;; same way as movdi or movti would: the first byte of memory goes
189 ;; into the most significant byte of the register and the last byte
190 ;; of memory goes into the least significant byte of the register.
191 ;; This is the most natural ordering for Advanced SIMD and matches
192 ;; the ABI layout for 64-bit and 128-bit vector types.
194 ;; As a result, the order of bytes within the register is what GCC
195 ;; expects for a big-endian target, and subreg offsets therefore work
196 ;; as expected, with the first element in memory having subreg offset 0
197 ;; and the last element in memory having the subreg offset associated
198 ;; with a big-endian lowpart. However, this ordering also means that
199 ;; GCC's lane numbering does not match the architecture's numbering:
200 ;; GCC always treats the element at the lowest address in memory
201 ;; (subreg offset 0) as element 0, while the architecture treats
202 ;; the least significant end of the register as element 0.
204 ;; The situation for SVE is different. We want the layout of the
205 ;; SVE register to be same for mov<mode> as it is for maskload<mode>:
206 ;; logically, a mov<mode> load must be indistinguishable from a
207 ;; maskload<mode> whose mask is all true. We therefore need the
208 ;; register layout to match LD1 rather than LDR. The ABI layout of
209 ;; SVE types also matches LD1 byte ordering rather than LDR byte ordering.
211 ;; As a result, the architecture lane numbering matches GCC's lane
212 ;; numbering, with element 0 always being the first in memory.
215 ;; - Applying a subreg offset to a register does not give the element
216 ;; that GCC expects: the first element in memory has the subreg offset
217 ;; associated with a big-endian lowpart while the last element in memory
218 ;; has subreg offset 0. We handle this via TARGET_CAN_CHANGE_MODE_CLASS.
220 ;; - We cannot use LDR and STR for spill slots that might be accessed
221 ;; via subregs, since although the elements have the order GCC expects,
222 ;; the order of the bytes within the elements is different. We instead
223 ;; access spill slots via LD1 and ST1, using secondary reloads to
224 ;; reserve a predicate register.
226 ;; -------------------------------------------------------------------------
227 ;; ---- Description of UNSPEC_PTEST
228 ;; -------------------------------------------------------------------------
230 ;; SVE provides a PTEST instruction for testing the active lanes of a
231 ;; predicate and setting the flags based on the result. The associated
232 ;; condition code tests are:
234 ;; - any (= ne): at least one active bit is set
235 ;; - none (= eq): all active bits are clear (*)
236 ;; - first (= mi): the first active bit is set
237 ;; - nfrst (= pl): the first active bit is clear (*)
238 ;; - last (= cc): the last active bit is set
239 ;; - nlast (= cs): the last active bit is clear (*)
241 ;; where the conditions marked (*) are also true when there are no active
242 ;; lanes (i.e. when the governing predicate is a PFALSE). The flags results
243 ;; of a PTEST use the condition code mode CC_NZC.
245 ;; PTEST is always a .B operation (i.e. it always operates on VNx16BI).
246 ;; This means that for other predicate modes, we need a governing predicate
247 ;; in which all bits are defined.
249 ;; For example, most predicated .H operations ignore the odd bits of the
250 ;; governing predicate, so that an active lane is represented by the
251 ;; bits "1x" and an inactive lane by the bits "0x", where "x" can be
252 ;; any value. To test a .H predicate, we instead need "10" and "00"
253 ;; respectively, so that the condition only tests the even bits of the
256 ;; Several instructions set the flags as a side-effect, in the same way
257 ;; that a separate PTEST would. It's important for code quality that we
258 ;; use these flags results as often as possible, particularly in the case
259 ;; of WHILE* and RDFFR.
261 ;; Also, some of the instructions that set the flags are unpredicated
262 ;; and instead implicitly test all .B, .H, .S or .D elements, as though
263 ;; they were predicated on a PTRUE of that size. For example, a .S
264 ;; WHILELO sets the flags in the same way as a PTEST with a .S PTRUE
267 ;; We therefore need to represent PTEST operations in a way that
268 ;; makes it easy to combine them with both predicated and unpredicated
269 ;; operations, while using a VNx16BI governing predicate for all
270 ;; predicate modes. We do this using:
272 ;; (unspec:CC_NZC [gp cast_gp ptrue_flag op] UNSPEC_PTEST)
276 ;; - GP is the real VNx16BI governing predicate
278 ;; - CAST_GP is GP cast to the mode of OP. All bits dropped by casting
279 ;; GP to CAST_GP are guaranteed to be clear in GP.
281 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
282 ;; SVE_KNOWN_PTRUE if we know that CAST_GP (rather than GP) is all-true and
283 ;; SVE_MAYBE_NOT_PTRUE otherwise.
285 ;; - OP is the predicate we want to test, of the same mode as CAST_GP.
287 ;; -------------------------------------------------------------------------
288 ;; ---- Description of UNSPEC_PRED_Z
289 ;; -------------------------------------------------------------------------
291 ;; SVE integer comparisons are predicated and return zero for inactive
292 ;; lanes. Sometimes we use them with predicates that are all-true and
293 ;; sometimes we use them with general predicates.
295 ;; The integer comparisons also set the flags and so build-in the effect
296 ;; of a PTEST. We therefore want to be able to combine integer comparison
297 ;; patterns with PTESTs of the result. One difficulty with doing this is
298 ;; that (as noted above) the PTEST is always a .B operation and so can place
299 ;; stronger requirements on the governing predicate than the comparison does.
301 ;; For example, when applying a separate PTEST to the result of a full-vector
302 ;; .H comparison, the PTEST must be predicated on a .H PTRUE instead of a
303 ;; .B PTRUE. In constrast, the comparison might be predicated on either
304 ;; a .H PTRUE or a .B PTRUE, since the values of odd-indexed predicate
305 ;; bits don't matter for .H operations.
307 ;; We therefore can't rely on a full-vector comparison using the same
308 ;; predicate register as a following PTEST. We instead need to remember
309 ;; whether a comparison is known to be a full-vector comparison and use
310 ;; this information in addition to a check for equal predicate registers.
311 ;; At the same time, it's useful to have a common representation for all
312 ;; integer comparisons, so that they can be handled by a single set of
315 ;; We therefore take a similar approach to UNSPEC_PTEST above and use:
317 ;; (unspec:<M:VPRED> [gp ptrue_flag (code:M op0 op1)] UNSPEC_PRED_Z)
321 ;; - GP is the governing predicate, of mode <M:VPRED>
323 ;; - PTRUE_FLAG is a CONST_INT (conceptually of mode SI) that has the value
324 ;; SVE_KNOWN_PTRUE if we know that GP is all-true and SVE_MAYBE_NOT_PTRUE
327 ;; - CODE is the comparison code
329 ;; - OP0 and OP1 are the values being compared, of mode M
331 ;; The "Z" in UNSPEC_PRED_Z indicates that inactive lanes are zero.
333 ;; -------------------------------------------------------------------------
334 ;; ---- Note on predicated integer arithemtic and UNSPEC_PRED_X
335 ;; -------------------------------------------------------------------------
337 ;; Many SVE integer operations are predicated. We can generate them
338 ;; from four sources:
340 ;; (1) Using normal unpredicated optabs. In this case we need to create
341 ;; an all-true predicate register to act as the governing predicate
342 ;; for the SVE instruction. There are no inactive lanes, and thus
343 ;; the values of inactive lanes don't matter.
345 ;; (2) Using _x ACLE functions. In this case the function provides a
346 ;; specific predicate and some lanes might be inactive. However,
347 ;; as for (1), the values of the inactive lanes don't matter.
348 ;; We can make extra lanes active without changing the behavior
349 ;; (although for code-quality reasons we should avoid doing so
352 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
353 ;; These optabs have a predicate operand that specifies which lanes are
354 ;; active and another operand that provides the values of inactive lanes.
356 ;; (4) Using _m and _z ACLE functions. These functions map to the same
357 ;; patterns as (3), with the _z functions setting inactive lanes to zero
358 ;; and the _m functions setting the inactive lanes to one of the function
361 ;; For (1) and (2) we need a way of attaching the predicate to a normal
362 ;; unpredicated integer operation. We do this using:
364 ;; (unspec:M [pred (code:M (op0 op1 ...))] UNSPEC_PRED_X)
366 ;; where (code:M (op0 op1 ...)) is the normal integer operation and PRED
367 ;; is a predicate of mode <M:VPRED>. PRED might or might not be a PTRUE;
368 ;; it always is for (1), but might not be for (2).
370 ;; The unspec as a whole has the same value as (code:M ...) when PRED is
371 ;; all-true. It is always semantically valid to replace PRED with a PTRUE,
372 ;; but as noted above, we should only do so if there's a specific benefit.
374 ;; (The "_X" in the unspec is named after the ACLE functions in (2).)
376 ;; For (3) and (4) we can simply use the SVE port's normal representation
377 ;; of a predicate-based select:
379 ;; (unspec:M [pred (code:M (op0 op1 ...)) inactive] UNSPEC_SEL)
381 ;; where INACTIVE specifies the values of inactive lanes.
383 ;; We can also use the UNSPEC_PRED_X wrapper in the UNSPEC_SEL rather
384 ;; than inserting the integer operation directly. This is mostly useful
385 ;; if we want the combine pass to merge an integer operation with an explicit
386 ;; vcond_mask (in other words, with a following SEL instruction). However,
387 ;; it's generally better to merge such operations at the gimple level
390 ;; -------------------------------------------------------------------------
391 ;; ---- Note on predicated FP arithmetic patterns and GP "strictness"
392 ;; -------------------------------------------------------------------------
394 ;; Most SVE floating-point operations are predicated. We can generate
395 ;; them from four sources:
397 ;; (1) Using normal unpredicated optabs. In this case we need to create
398 ;; an all-true predicate register to act as the governing predicate
399 ;; for the SVE instruction. There are no inactive lanes, and thus
400 ;; the values of inactive lanes don't matter.
402 ;; (2) Using _x ACLE functions. In this case the function provides a
403 ;; specific predicate and some lanes might be inactive. However,
404 ;; as for (1), the values of the inactive lanes don't matter.
406 ;; The instruction must have the same exception behavior as the
407 ;; function call unless things like command-line flags specifically
408 ;; allow otherwise. For example, with -ffast-math, it is OK to
409 ;; raise exceptions for inactive lanes, but normally it isn't.
411 ;; (3) Using cond_* optabs that correspond to IFN_COND_* internal functions.
412 ;; These optabs have a predicate operand that specifies which lanes are
413 ;; active and another operand that provides the values of inactive lanes.
415 ;; (4) Using _m and _z ACLE functions. These functions map to the same
416 ;; patterns as (3), with the _z functions setting inactive lanes to zero
417 ;; and the _m functions setting the inactive lanes to one of the function
422 ;; - In (1), the predicate is known to be all true and the pattern can use
423 ;; unpredicated operations where available.
425 ;; - In (2), the predicate might or might not be all true. The pattern can
426 ;; use unpredicated instructions if the predicate is all-true or if things
427 ;; like command-line flags allow exceptions for inactive lanes.
429 ;; - (3) and (4) represent a native SVE predicated operation. Some lanes
430 ;; might be inactive and inactive lanes of the result must have specific
431 ;; values. There is no scope for using unpredicated instructions (and no
432 ;; reason to want to), so the question about command-line flags doesn't
435 ;; It would be inaccurate to model (2) as an rtx code like (sqrt ...)
436 ;; in combination with a separate predicate operand, e.g.
438 ;; (unspec [(match_operand:<VPRED> 1 "register_operand" "Upl")
439 ;; (sqrt:SVE_FULL_F 2 "register_operand" "w")]
442 ;; because (sqrt ...) can raise an exception for any lane, including
443 ;; inactive ones. We therefore need to use an unspec instead.
445 ;; Also, (2) requires some way of distinguishing the case in which the
446 ;; predicate might have inactive lanes and cannot be changed from the
447 ;; case in which the predicate has no inactive lanes or can be changed.
448 ;; This information is also useful when matching combined FP patterns
449 ;; in which the predicates might not be equal.
451 ;; We therefore model FP operations as an unspec of the form:
453 ;; (unspec [pred strictness op0 op1 ...] UNSPEC_COND_<MNEMONIC>)
457 ;; - PRED is the governing predicate.
459 ;; - STRICTNESS is a CONST_INT that conceptually has mode SI. It has the
460 ;; value SVE_STRICT_GP if PRED might have inactive lanes and if those
461 ;; lanes must remain inactive. It has the value SVE_RELAXED_GP otherwise.
463 ;; - OP0 OP1 ... are the normal input operands to the operation.
465 ;; - MNEMONIC is the mnemonic of the associated SVE instruction.
467 ;; For (3) and (4), we combine these operations with an UNSPEC_SEL
468 ;; that selects between the result of the FP operation and the "else"
469 ;; value. (This else value is a merge input for _m ACLE functions
470 ;; and zero for _z ACLE functions.) The outer pattern then has the form:
472 ;; (unspec [pred fp_operation else_value] UNSPEC_SEL)
474 ;; This means that the patterns for (3) and (4) have two predicates:
475 ;; one for the FP operation itself and one for the UNSPEC_SEL.
476 ;; This pattern is equivalent to the result of combining an instance
477 ;; of (1) or (2) with a separate vcond instruction, so these patterns
478 ;; are useful as combine targets too.
480 ;; However, in the combine case, the instructions that we want to
481 ;; combine might use different predicates. Then:
483 ;; - Some of the active lanes of the FP operation might be discarded
484 ;; by the UNSPEC_SEL. It's OK to drop the FP operation on those lanes,
485 ;; even for SVE_STRICT_GP, since the operations on those lanes are
486 ;; effectively dead code.
488 ;; - Some of the inactive lanes of the FP operation might be selected
489 ;; by the UNSPEC_SEL, giving unspecified values for those lanes.
490 ;; SVE_RELAXED_GP lets us extend the FP operation to cover these
491 ;; extra lanes, but SVE_STRICT_GP does not.
493 ;; Thus SVE_RELAXED_GP allows us to ignore the predicate on the FP operation
494 ;; and operate on exactly the lanes selected by the UNSPEC_SEL predicate.
495 ;; This typically leads to patterns like:
497 ;; (unspec [(match_operand 1 "register_operand" "Upl")
498 ;; (unspec [(match_operand N)
499 ;; (const_int SVE_RELAXED_GP)
501 ;; UNSPEC_COND_<MNEMONIC>)
504 ;; where operand N is allowed to be anything. These instructions then
505 ;; have rewrite rules to replace operand N with operand 1, which gives the
506 ;; instructions a canonical form and means that the original operand N is
507 ;; not kept live unnecessarily.
509 ;; In contrast, SVE_STRICT_GP only allows the UNSPEC_SEL predicate to be
510 ;; a subset of the FP operation predicate. This case isn't interesting
511 ;; for FP operations that have an all-true predicate, since such operations
512 ;; use SVE_RELAXED_GP instead. And it is not possible for instruction
513 ;; conditions to track the subset relationship for arbitrary registers.
514 ;; So in practice, the only useful case for SVE_STRICT_GP is the one
515 ;; in which the predicates match:
517 ;; (unspec [(match_operand 1 "register_operand" "Upl")
518 ;; (unspec [(match_dup 1)
519 ;; (const_int SVE_STRICT_GP)
521 ;; UNSPEC_COND_<MNEMONIC>)
524 ;; This pattern would also be correct for SVE_RELAXED_GP, but it would
525 ;; be redundant with the one above. However, if the combine pattern
526 ;; has multiple FP operations, using a match_operand allows combinations
527 ;; of SVE_STRICT_GP and SVE_RELAXED_GP in the same operation, provided
528 ;; that the predicates are the same:
530 ;; (unspec [(match_operand 1 "register_operand" "Upl")
532 ;; (unspec [(match_dup 1)
533 ;; (match_operand:SI N "aarch64_sve_gp_strictness")
535 ;; UNSPEC_COND_<MNEMONIC1>)
536 ;; (unspec [(match_dup 1)
537 ;; (match_operand:SI M "aarch64_sve_gp_strictness")
539 ;; UNSPEC_COND_<MNEMONIC2>) ...)
542 ;; The fully-relaxed version of this pattern is:
544 ;; (unspec [(match_operand 1 "register_operand" "Upl")
546 ;; (unspec [(match_operand:SI N)
547 ;; (const_int SVE_RELAXED_GP)
549 ;; UNSPEC_COND_<MNEMONIC1>)
550 ;; (unspec [(match_operand:SI M)
551 ;; (const_int SVE_RELAXED_GP)
553 ;; UNSPEC_COND_<MNEMONIC2>) ...)
556 ;; -------------------------------------------------------------------------
557 ;; ---- Note on FFR handling
558 ;; -------------------------------------------------------------------------
560 ;; Logically we want to divide FFR-related instructions into regions
561 ;; that contain exactly one of:
563 ;; - a single write to the FFR
564 ;; - any number of reads from the FFR (but only one read is likely)
565 ;; - any number of LDFF1 and LDNF1 instructions
567 ;; However, LDFF1 and LDNF1 instructions should otherwise behave like
568 ;; normal loads as far as possible. This means that they should be
569 ;; schedulable within a region in the same way that LD1 would be,
570 ;; and they should be deleted as dead if the result is unused. The loads
571 ;; should therefore not write to the FFR, since that would both serialize
572 ;; the loads with respect to each other and keep the loads live for any
575 ;; We get around this by using a fake "FFR token" (FFRT) to help describe
576 ;; the dependencies. Writing to the FFRT starts a new "FFRT region",
577 ;; while using the FFRT keeps the instruction within its region.
580 ;; - Writes start a new FFRT region as well as setting the FFR:
582 ;; W1: parallel (FFRT = <new value>, FFR = <actual FFR value>)
584 ;; - Loads use an LD1-like instruction that also uses the FFRT, so that the
585 ;; loads stay within the same FFRT region:
587 ;; L1: load data while using the FFRT
589 ;; In addition, any FFRT region that includes a load also has at least one
592 ;; L2: FFR = update(FFR, FFRT) [type == no_insn]
594 ;; to make it clear that the region both reads from and writes to the FFR.
596 ;; - Reads do the following:
598 ;; R1: FFRT = FFR [type == no_insn]
599 ;; R2: read from the FFRT
600 ;; R3: FFRT = update(FFRT) [type == no_insn]
602 ;; R1 and R3 both create new FFRT regions, so that previous LDFF1s and
603 ;; LDNF1s cannot move forwards across R1 and later LDFF1s and LDNF1s
604 ;; cannot move backwards across R3.
606 ;; This way, writes are only kept alive by later loads or reads,
607 ;; and write/read pairs fold normally. For two consecutive reads,
608 ;; the first R3 is made dead by the second R1, which in turn becomes
609 ;; redundant with the first R1. We then have:
611 ;; first R1: FFRT = FFR
612 ;; first read from the FFRT
613 ;; second read from the FFRT
614 ;; second R3: FFRT = update(FFRT)
616 ;; i.e. the two FFRT regions collapse into a single one with two
617 ;; independent reads.
619 ;; The model still prevents some valid optimizations though. For example,
620 ;; if all loads in an FFRT region are deleted as dead, nothing would remove
621 ;; the L2 instructions.
623 ;; =========================================================================
625 ;; =========================================================================
627 ;; -------------------------------------------------------------------------
628 ;; ---- Moves of single vectors
629 ;; -------------------------------------------------------------------------
631 ;; - MOV (including aliases)
632 ;; - LD1B (contiguous form)
637 ;; - ST1B (contiguous form)
642 ;; -------------------------------------------------------------------------
644 (define_expand "mov<mode>"
645 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
646 (match_operand:SVE_ALL 1 "general_operand"))]
649 /* Use the predicated load and store patterns where possible.
650 This is required for big-endian targets (see the comment at the
651 head of the file) and increases the addressing choices for
653 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
654 && can_create_pseudo_p ())
656 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
660 if (CONSTANT_P (operands[1]))
662 aarch64_expand_mov_immediate (operands[0], operands[1]);
666 /* Optimize subregs on big-endian targets: we can use REV[BHW]
667 instead of going through memory. */
669 && aarch64_maybe_expand_sve_subreg_move (operands[0], operands[1]))
674 (define_expand "movmisalign<mode>"
675 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
676 (match_operand:SVE_ALL 1 "general_operand"))]
679 /* Equivalent to a normal move for our purpooses. */
680 emit_move_insn (operands[0], operands[1]);
685 ;; Unpredicated moves that can use LDR and STR, i.e. full vectors for which
686 ;; little-endian ordering is acceptable. Only allow memory operations during
687 ;; and after RA; before RA we want the predicated load and store patterns to
689 (define_insn "*aarch64_sve_mov<mode>_ldr_str"
690 [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand")
691 (match_operand:SVE_FULL 1 "aarch64_sve_general_operand"))]
693 && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
694 && ((lra_in_progress || reload_completed)
695 || (register_operand (operands[0], <MODE>mode)
696 && nonmemory_operand (operands[1], <MODE>mode)))"
698 [ w , Utr ] ldr\t%0, %1
699 [ Utr , w ] str\t%1, %0
700 [ w , w ] mov\t%0.d, %1.d
701 [ w , Dn ] << aarch64_output_sve_mov_immediate (operands[1]);
705 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
706 ;; or vectors for which little-endian ordering isn't acceptable. Memory
707 ;; accesses require secondary reloads.
708 (define_insn "*aarch64_sve_mov<mode>_no_ldr_str"
709 [(set (match_operand:SVE_ALL 0 "register_operand")
710 (match_operand:SVE_ALL 1 "aarch64_nonmemory_operand"))]
712 && <MODE>mode != VNx16QImode
714 || maybe_ne (BYTES_PER_SVE_VECTOR, GET_MODE_SIZE (<MODE>mode)))"
716 [ w , w ] mov\t%0.d, %1.d
717 [ w , Dn ] << aarch64_output_sve_mov_immediate (operands[1]);
721 ;; Handle memory reloads for modes that can't use LDR and STR. We use
722 ;; byte PTRUE for all modes to try to encourage reuse. This pattern
723 ;; needs constraints because it is returned by TARGET_SECONDARY_RELOAD.
724 (define_expand "aarch64_sve_reload_mem"
726 [(set (match_operand 0)
728 (clobber (match_operand:VNx16BI 2 "register_operand" "=Upl"))])]
731 /* Create a PTRUE. */
732 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
734 /* Refer to the PTRUE in the appropriate mode for this move. */
735 machine_mode mode = GET_MODE (operands[0]);
736 rtx pred = gen_lowpart (aarch64_sve_pred_mode (mode), operands[2]);
738 /* Emit a predicated load or store. */
739 aarch64_emit_sve_pred_move (operands[0], pred, operands[1]);
744 ;; A predicated move in which the predicate is known to be all-true.
745 ;; Note that this pattern is generated directly by aarch64_emit_sve_pred_move,
746 ;; so changes to this pattern will need changes there as well.
747 (define_insn_and_split "@aarch64_pred_mov<mode>"
748 [(set (match_operand:SVE_ALL 0 "nonimmediate_operand")
750 [(match_operand:<VPRED> 1 "register_operand")
751 (match_operand:SVE_ALL 2 "nonimmediate_operand")]
754 && (register_operand (operands[0], <MODE>mode)
755 || register_operand (operands[2], <MODE>mode))"
756 {@ [ cons: =0 , 1 , 2 ]
758 [ w , Upl , m ] ld1<Vesize>\t%0.<Vctype>, %1/z, %2
759 [ m , Upl , w ] st1<Vesize>\t%2.<Vctype>, %1, %0
761 "&& register_operand (operands[0], <MODE>mode)
762 && register_operand (operands[2], <MODE>mode)"
763 [(set (match_dup 0) (match_dup 2))]
766 ;; A pattern for optimizing SUBREGs that have a reinterpreting effect
767 ;; on big-endian targets; see aarch64_maybe_expand_sve_subreg_move
768 ;; for details. We use a special predicate for operand 2 to reduce
769 ;; the number of patterns.
770 (define_insn_and_split "*aarch64_sve_mov<mode>_subreg_be"
771 [(set (match_operand:SVE_ALL 0 "aarch64_sve_nonimmediate_operand" "=w")
773 [(match_operand:VNx16BI 1 "register_operand" "Upl")
774 (match_operand 2 "aarch64_any_register_operand" "w")]
776 "TARGET_SVE && BYTES_BIG_ENDIAN"
778 "&& reload_completed"
781 aarch64_split_sve_subreg_move (operands[0], operands[1], operands[2]);
786 ;; Reinterpret operand 1 in operand 0's mode, without changing its contents.
787 ;; This is equivalent to a subreg on little-endian targets but not for
788 ;; big-endian; see the comment at the head of the file for details.
789 (define_expand "@aarch64_sve_reinterpret<mode>"
790 [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand")
791 (unspec:SVE_ALL_STRUCT
792 [(match_operand 1 "aarch64_any_register_operand")]
793 UNSPEC_REINTERPRET))]
796 machine_mode src_mode = GET_MODE (operands[1]);
797 if (targetm.can_change_mode_class (<MODE>mode, src_mode, FP_REGS))
799 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, operands[1]));
805 ;; A pattern for handling type punning on big-endian targets. We use a
806 ;; special predicate for operand 1 to reduce the number of patterns.
807 (define_insn_and_split "*aarch64_sve_reinterpret<mode>"
808 [(set (match_operand:SVE_ALL_STRUCT 0 "register_operand" "=w")
809 (unspec:SVE_ALL_STRUCT
810 [(match_operand 1 "aarch64_any_register_operand" "w")]
811 UNSPEC_REINTERPRET))]
814 "&& reload_completed"
815 [(set (match_dup 0) (match_dup 1))]
817 operands[1] = aarch64_replace_reg_mode (operands[1], <MODE>mode);
821 ;; -------------------------------------------------------------------------
822 ;; ---- Moves of multiple vectors
823 ;; -------------------------------------------------------------------------
824 ;; All patterns in this section are synthetic and split to real
825 ;; instructions after reload.
826 ;; -------------------------------------------------------------------------
828 (define_expand "mov<mode>"
829 [(set (match_operand:SVE_STRUCT 0 "nonimmediate_operand")
830 (match_operand:SVE_STRUCT 1 "general_operand"))]
833 /* Big-endian loads and stores need to be done via LD1 and ST1;
834 see the comment at the head of the file for details. */
835 if ((MEM_P (operands[0]) || MEM_P (operands[1]))
838 gcc_assert (can_create_pseudo_p ());
839 aarch64_expand_sve_mem_move (operands[0], operands[1], <VPRED>mode);
843 if (CONSTANT_P (operands[1]))
845 aarch64_expand_mov_immediate (operands[0], operands[1]);
851 ;; Unpredicated structure moves (little-endian).
852 (define_insn "*aarch64_sve_mov<mode>_le"
853 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand" "=w, Utr, w, w")
854 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand" "Utr, w, w, Dn"))]
855 "TARGET_SVE && !BYTES_BIG_ENDIAN"
857 [(set_attr "length" "<insn_length>")]
860 ;; Unpredicated structure moves (big-endian). Memory accesses require
861 ;; secondary reloads.
862 (define_insn "*aarch64_sve_mov<mode>_be"
863 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w, w")
864 (match_operand:SVE_STRUCT 1 "aarch64_nonmemory_operand" "w, Dn"))]
865 "TARGET_SVE && BYTES_BIG_ENDIAN"
867 [(set_attr "length" "<insn_length>")]
870 ;; Split unpredicated structure moves into pieces. This is the same
871 ;; for both big-endian and little-endian code, although it only needs
872 ;; to handle memory operands for little-endian code.
874 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_nonimmediate_operand")
875 (match_operand:SVE_STRUCT 1 "aarch64_sve_general_operand"))]
876 "TARGET_SVE && reload_completed"
879 rtx dest = operands[0];
880 rtx src = operands[1];
881 if (REG_P (dest) && REG_P (src))
882 aarch64_simd_emit_reg_reg_move (operands, <VSINGLE>mode, <vector_count>);
884 for (unsigned int i = 0; i < <vector_count>; ++i)
886 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, dest, <MODE>mode,
887 i * BYTES_PER_SVE_VECTOR);
888 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, src, <MODE>mode,
889 i * BYTES_PER_SVE_VECTOR);
890 emit_insn (gen_rtx_SET (subdest, subsrc));
896 ;; Predicated structure moves. This works for both endiannesses but in
897 ;; practice is only useful for big-endian.
898 (define_insn_and_split "@aarch64_pred_mov<mode>"
899 [(set (match_operand:SVE_STRUCT 0 "aarch64_sve_struct_nonimmediate_operand" "=w, w, Utx")
901 [(match_operand:<VPRED> 1 "register_operand" "Upl, Upl, Upl")
902 (match_operand:SVE_STRUCT 2 "aarch64_sve_struct_nonimmediate_operand" "w, Utx, w")]
905 && (register_operand (operands[0], <MODE>mode)
906 || register_operand (operands[2], <MODE>mode))"
908 "&& reload_completed"
911 for (unsigned int i = 0; i < <vector_count>; ++i)
913 rtx subdest = simplify_gen_subreg (<VSINGLE>mode, operands[0],
915 i * BYTES_PER_SVE_VECTOR);
916 rtx subsrc = simplify_gen_subreg (<VSINGLE>mode, operands[2],
918 i * BYTES_PER_SVE_VECTOR);
919 aarch64_emit_sve_pred_move (subdest, operands[1], subsrc);
923 [(set_attr "length" "<insn_length>")]
926 ;; -------------------------------------------------------------------------
927 ;; ---- Moves of predicates
928 ;; -------------------------------------------------------------------------
936 ;; -------------------------------------------------------------------------
938 (define_expand "mov<mode>"
939 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
940 (match_operand:PRED_ALL 1 "general_operand"))]
943 if (GET_CODE (operands[0]) == MEM)
944 operands[1] = force_reg (<MODE>mode, operands[1]);
946 if (CONSTANT_P (operands[1]))
948 aarch64_expand_mov_immediate (operands[0], operands[1]);
954 (define_insn "*aarch64_sve_mov<mode>"
955 [(set (match_operand:PRED_ALL 0 "nonimmediate_operand")
956 (match_operand:PRED_ALL 1 "aarch64_mov_operand"))]
958 && (register_operand (operands[0], <MODE>mode)
959 || register_operand (operands[1], <MODE>mode))"
961 [ Upa , Upa ] mov\t%0.b, %1.b
962 [ m , Upa ] str\t%1, %0
963 [ Upa , m ] ldr\t%0, %1
964 [ Upa , Dn ] << aarch64_output_sve_mov_immediate (operands[1]);
968 ;; Match PTRUES Pn.B when both the predicate and flags are useful.
969 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_cc"
970 [(set (reg:CC_NZC CC_REGNUM)
974 (const_int SVE_KNOWN_PTRUE)
975 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
977 [(match_operand:SI 4 "const_int_operand")
978 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
981 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
985 return aarch64_output_sve_ptrues (operands[1]);
987 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
989 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
993 ;; Match PTRUES Pn.[HSD] when both the predicate and flags are useful.
994 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_cc"
995 [(set (reg:CC_NZC CC_REGNUM)
999 (const_int SVE_KNOWN_PTRUE)
1001 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1003 [(match_operand:SI 4 "const_int_operand")
1004 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1007 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1011 return aarch64_output_sve_ptrues (operands[1]);
1013 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1015 operands[2] = CONSTM1_RTX (VNx16BImode);
1016 operands[3] = CONSTM1_RTX (<MODE>mode);
1020 ;; Match PTRUES Pn.B when only the flags result is useful (which is
1021 ;; a way of testing VL).
1022 (define_insn_and_rewrite "*aarch64_sve_ptruevnx16bi_ptest"
1023 [(set (reg:CC_NZC CC_REGNUM)
1027 (const_int SVE_KNOWN_PTRUE)
1028 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1030 [(match_operand:SI 4 "const_int_operand")
1031 (match_operand:VNx16BI 5 "aarch64_simd_imm_zero")]
1034 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1037 return aarch64_output_sve_ptrues (operands[1]);
1039 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1041 operands[2] = operands[3] = CONSTM1_RTX (VNx16BImode);
1045 ;; Match PTRUES Pn.[HWD] when only the flags result is useful (which is
1046 ;; a way of testing VL).
1047 (define_insn_and_rewrite "*aarch64_sve_ptrue<mode>_ptest"
1048 [(set (reg:CC_NZC CC_REGNUM)
1052 (const_int SVE_KNOWN_PTRUE)
1054 (match_operator:VNx16BI 1 "aarch64_sve_ptrue_svpattern_immediate"
1056 [(match_operand:SI 4 "const_int_operand")
1057 (match_operand:PRED_HSD 5 "aarch64_simd_imm_zero")]
1060 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
1063 return aarch64_output_sve_ptrues (operands[1]);
1065 "&& (!CONSTANT_P (operands[2]) || !CONSTANT_P (operands[3]))"
1067 operands[2] = CONSTM1_RTX (VNx16BImode);
1068 operands[3] = CONSTM1_RTX (<MODE>mode);
1072 ;; -------------------------------------------------------------------------
1073 ;; ---- Moves of multiple predicates
1074 ;; -------------------------------------------------------------------------
1076 (define_insn_and_split "mov<mode>"
1077 [(set (match_operand:SVE_STRUCT_BI 0 "nonimmediate_operand")
1078 (match_operand:SVE_STRUCT_BI 1 "aarch64_mov_operand"))]
1085 "&& reload_completed"
1088 aarch64_split_move (operands[0], operands[1], VNx16BImode);
1093 ;; -------------------------------------------------------------------------
1094 ;; ---- Moves relating to the FFR
1095 ;; -------------------------------------------------------------------------
1100 ;; -------------------------------------------------------------------------
1102 ;; [W1 in the block comment above about FFR handling]
1104 ;; Write to the FFR and start a new FFRT scheduling region.
1105 (define_insn "aarch64_wrffr"
1106 [(set (reg:VNx16BI FFR_REGNUM)
1107 (match_operand:VNx16BI 0 "aarch64_simd_reg_or_minus_one"))
1108 (set (reg:VNx16BI FFRT_REGNUM)
1109 (unspec:VNx16BI [(match_dup 0)] UNSPEC_WRFFR))]
1110 "TARGET_SVE && TARGET_NON_STREAMING"
1117 ;; [L2 in the block comment above about FFR handling]
1119 ;; Introduce a read from and write to the FFR in the current FFRT region,
1120 ;; so that the FFR value is live on entry to the region and so that the FFR
1121 ;; value visibly changes within the region. This is used (possibly multiple
1122 ;; times) in an FFRT region that includes LDFF1 or LDNF1 instructions.
1123 (define_insn "aarch64_update_ffr_for_load"
1124 [(set (reg:VNx16BI FFR_REGNUM)
1125 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)
1126 (reg:VNx16BI FFR_REGNUM)] UNSPEC_UPDATE_FFR))]
1129 [(set_attr "type" "no_insn")]
1132 ;; [R1 in the block comment above about FFR handling]
1134 ;; Notionally copy the FFR to the FFRT, so that the current FFR value
1135 ;; can be read from there by the RDFFR instructions below. This acts
1136 ;; as a scheduling barrier for earlier LDFF1 and LDNF1 instructions and
1137 ;; creates a natural dependency with earlier writes.
1138 (define_insn "aarch64_copy_ffr_to_ffrt"
1139 [(set (reg:VNx16BI FFRT_REGNUM)
1140 (reg:VNx16BI FFR_REGNUM))]
1143 [(set_attr "type" "no_insn")]
1146 ;; [R2 in the block comment above about FFR handling]
1148 ;; Read the FFR via the FFRT.
1149 (define_insn "aarch64_rdffr"
1150 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
1151 (reg:VNx16BI FFRT_REGNUM))]
1152 "TARGET_SVE && TARGET_NON_STREAMING"
1156 ;; Likewise with zero predication.
1157 (define_insn "aarch64_rdffr_z"
1158 [(set (match_operand:VNx16BI 0 "register_operand")
1160 (reg:VNx16BI FFRT_REGNUM)
1161 (match_operand:VNx16BI 1 "register_operand")))]
1162 "TARGET_SVE && TARGET_NON_STREAMING"
1163 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1164 [ &Upa , Upa ; yes ] rdffr\t%0.b, %1/z
1165 [ ?Upa , 0Upa; yes ] ^
1166 [ Upa , Upa ; no ] ^
1170 ;; Read the FFR to test for a fault, without using the predicate result.
1171 (define_insn "*aarch64_rdffr_z_ptest"
1172 [(set (reg:CC_NZC CC_REGNUM)
1174 [(match_operand:VNx16BI 1 "register_operand")
1176 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1178 (reg:VNx16BI FFRT_REGNUM)
1181 (clobber (match_scratch:VNx16BI 0))]
1182 "TARGET_SVE && TARGET_NON_STREAMING"
1183 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1184 [ &Upa , Upa ; yes ] rdffrs\t%0.b, %1/z
1185 [ ?Upa , 0Upa; yes ] ^
1186 [ Upa , Upa ; no ] ^
1190 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1191 (define_insn "*aarch64_rdffr_ptest"
1192 [(set (reg:CC_NZC CC_REGNUM)
1194 [(match_operand:VNx16BI 1 "register_operand")
1196 (const_int SVE_KNOWN_PTRUE)
1197 (reg:VNx16BI FFRT_REGNUM)]
1199 (clobber (match_scratch:VNx16BI 0))]
1200 "TARGET_SVE && TARGET_NON_STREAMING"
1201 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1202 [ &Upa , Upa ; yes ] rdffrs\t%0.b, %1/z
1203 [ ?Upa , 0Upa; yes ] ^
1204 [ Upa , Upa ; no ] ^
1208 ;; Read the FFR with zero predication and test the result.
1209 (define_insn "*aarch64_rdffr_z_cc"
1210 [(set (reg:CC_NZC CC_REGNUM)
1212 [(match_operand:VNx16BI 1 "register_operand")
1214 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
1216 (reg:VNx16BI FFRT_REGNUM)
1219 (set (match_operand:VNx16BI 0 "register_operand")
1221 (reg:VNx16BI FFRT_REGNUM)
1223 "TARGET_SVE && TARGET_NON_STREAMING"
1224 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1225 [ &Upa , Upa ; yes ] rdffrs\t%0.b, %1/z
1226 [ ?Upa , 0Upa; yes ] ^
1227 [ Upa , Upa ; no ] ^
1231 ;; Same for unpredicated RDFFR when tested with a known PTRUE.
1232 (define_insn "*aarch64_rdffr_cc"
1233 [(set (reg:CC_NZC CC_REGNUM)
1235 [(match_operand:VNx16BI 1 "register_operand")
1237 (const_int SVE_KNOWN_PTRUE)
1238 (reg:VNx16BI FFRT_REGNUM)]
1240 (set (match_operand:VNx16BI 0 "register_operand")
1241 (reg:VNx16BI FFRT_REGNUM))]
1242 "TARGET_SVE && TARGET_NON_STREAMING"
1243 {@ [ cons: =0, 1 ; attrs: pred_clobber ]
1244 [ &Upa , Upa ; yes ] rdffrs\t%0.b, %1/z
1245 [ ?Upa , 0Upa; yes ] ^
1246 [ Upa , Upa ; no ] ^
1250 ;; [R3 in the block comment above about FFR handling]
1252 ;; Arbitrarily update the FFRT after a read from the FFR. This acts as
1253 ;; a scheduling barrier for later LDFF1 and LDNF1 instructions.
1254 (define_insn "aarch64_update_ffrt"
1255 [(set (reg:VNx16BI FFRT_REGNUM)
1256 (unspec:VNx16BI [(reg:VNx16BI FFRT_REGNUM)] UNSPEC_UPDATE_FFRT))]
1259 [(set_attr "type" "no_insn")]
1262 ;; =========================================================================
1264 ;; =========================================================================
1266 ;; -------------------------------------------------------------------------
1267 ;; ---- Normal contiguous loads
1268 ;; -------------------------------------------------------------------------
1269 ;; Includes contiguous forms of:
1286 ;; -------------------------------------------------------------------------
1288 ;; Predicated LD1 (single).
1289 (define_insn "maskload<mode><vpred>"
1290 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
1292 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1293 (match_operand:SVE_ALL 1 "memory_operand" "m")
1294 (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")]
1297 "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
1300 ;; Unpredicated LD[234].
1301 (define_expand "vec_load_lanes<mode><vsingle>"
1302 [(set (match_operand:SVE_STRUCT 0 "register_operand")
1305 (match_operand:SVE_STRUCT 1 "memory_operand")
1310 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
1311 operands[3] = CONST0_RTX (<MODE>mode);
1315 ;; Predicated LD[234].
1316 (define_insn "vec_mask_load_lanes<mode><vsingle>"
1317 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
1319 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1320 (match_operand:SVE_STRUCT 1 "memory_operand" "m")
1321 (match_operand 3 "aarch64_maskload_else_operand")]
1324 "ld<vector_count><Vesize>\t%0, %2/z, %1"
1327 ;; -------------------------------------------------------------------------
1328 ;; ---- Extending contiguous loads
1329 ;; -------------------------------------------------------------------------
1330 ;; Includes contiguous forms of:
1337 ;; -------------------------------------------------------------------------
1339 ;; Predicated load and extend, with 8 elements per 128-bit block.
1340 (define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1341 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1343 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1344 (ANY_EXTEND:SVE_HSDI
1345 (unspec:SVE_PARTIAL_I
1346 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1347 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")
1348 (match_operand:SVE_PARTIAL_I 4 "aarch64_maskload_else_operand")]
1351 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1352 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1353 "&& !CONSTANT_P (operands[3])"
1355 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1359 ;; Same as above without the maskload_else_operand to still allow combine to
1360 ;; match a sign-extended pred_mov pattern.
1361 (define_insn_and_rewrite "*aarch64_load_<ANY_EXTEND:optab>_mov<SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1362 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1364 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1365 (ANY_EXTEND:SVE_HSDI
1366 (unspec:SVE_PARTIAL_I
1367 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1368 (match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
1371 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1372 "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1373 "&& !CONSTANT_P (operands[3])"
1375 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1379 ;; -------------------------------------------------------------------------
1380 ;; ---- First-faulting contiguous loads
1381 ;; -------------------------------------------------------------------------
1382 ;; Includes contiguous forms of:
1391 ;; -------------------------------------------------------------------------
1393 ;; Contiguous non-extending first-faulting or non-faulting loads.
1394 (define_insn "@aarch64_ld<fn>f1<mode>"
1395 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1397 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1398 (match_operand:SVE_FULL 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1399 (reg:VNx16BI FFRT_REGNUM)]
1401 "TARGET_SVE && TARGET_NON_STREAMING"
1402 "ld<fn>f1<Vesize>\t%0.<Vetype>, %2/z, %1"
1405 ;; -------------------------------------------------------------------------
1406 ;; ---- First-faulting extending contiguous loads
1407 ;; -------------------------------------------------------------------------
1408 ;; Includes contiguous forms of:
1421 ;; -------------------------------------------------------------------------
1423 ;; Predicated first-faulting or non-faulting load and extend.
1424 (define_insn_and_rewrite "@aarch64_ld<fn>f1_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
1425 [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
1427 [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
1428 (ANY_EXTEND:SVE_HSDI
1429 (unspec:SVE_PARTIAL_I
1430 [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
1431 (match_operand:SVE_PARTIAL_I 1 "aarch64_sve_ld<fn>f1_operand" "Ut<fn>")
1432 (reg:VNx16BI FFRT_REGNUM)]
1436 && TARGET_NON_STREAMING
1437 && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
1438 "ld<fn>f1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
1439 "&& !CONSTANT_P (operands[3])"
1441 operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
1445 ;; -------------------------------------------------------------------------
1446 ;; ---- Non-temporal contiguous loads
1447 ;; -------------------------------------------------------------------------
1453 ;; -------------------------------------------------------------------------
1455 ;; Predicated contiguous non-temporal load (single).
1456 (define_insn "@aarch64_ldnt1<mode>"
1457 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
1459 [(match_operand:<VPRED> 2 "register_operand" "Upl")
1460 (match_operand:SVE_FULL 1 "memory_operand" "m")
1461 (match_operand:SVE_FULL 3 "aarch64_maskload_else_operand")]
1464 "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
1467 ;; -------------------------------------------------------------------------
1468 ;; ---- Normal gather loads
1469 ;; -------------------------------------------------------------------------
1470 ;; Includes gather forms of:
1473 ;; -------------------------------------------------------------------------
1475 ;; Unpredicated gather loads.
1476 (define_expand "gather_load<mode><v_int_container>"
1477 [(set (match_operand:SVE_24 0 "register_operand")
1480 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1481 (match_operand:<V_INT_CONTAINER> 2 "register_operand")
1482 (match_operand:DI 3 "const_int_operand")
1483 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1485 (mem:BLK (scratch))]
1486 UNSPEC_LD1_GATHER))]
1487 "TARGET_SVE && TARGET_NON_STREAMING"
1489 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
1490 operands[6] = CONST0_RTX (<MODE>mode);
1494 ;; Predicated gather loads for 32-bit elements. Operand 3 is true for
1495 ;; unsigned extension and false for signed extension.
1496 (define_insn "mask_gather_load<mode><v_int_container>"
1497 [(set (match_operand:SVE_4 0 "register_operand")
1499 [(match_operand:VNx4BI 5 "register_operand")
1500 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1501 (match_operand:VNx4SI 2 "register_operand")
1502 (match_operand:DI 3 "const_int_operand")
1503 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1504 (match_operand:SVE_4 6 "aarch64_maskload_else_operand")
1505 (mem:BLK (scratch))]
1506 UNSPEC_LD1_GATHER))]
1507 "TARGET_SVE && TARGET_NON_STREAMING"
1508 {@ [cons: =0, 1, 2, 3, 4, 5 ]
1509 [&w, Z, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s]
1510 [?w, Z, 0, Ui1, Ui1, Upl] ^
1511 [&w, vgw, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%2.s, #%1]
1512 [?w, vgw, 0, Ui1, Ui1, Upl] ^
1513 [&w, rk, w, Z, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1514 [?w, rk, 0, Z, Ui1, Upl] ^
1515 [&w, rk, w, Ui1, Ui1, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1516 [?w, rk, 0, Ui1, Ui1, Upl] ^
1517 [&w, rk, w, Z, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1518 [?w, rk, 0, Z, i, Upl] ^
1519 [&w, rk, w, Ui1, i, Upl] ld1<Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1520 [?w, rk, 0, Ui1, i, Upl] ^
1524 ;; Predicated gather loads for 64-bit elements. The value of operand 3
1525 ;; doesn't matter in this case.
1526 (define_insn "mask_gather_load<mode><v_int_container>"
1527 [(set (match_operand:SVE_2 0 "register_operand")
1529 [(match_operand:VNx2BI 5 "register_operand")
1530 (match_operand:DI 1 "aarch64_sve_gather_offset_<Vesize>")
1531 (match_operand:VNx2DI 2 "register_operand")
1532 (match_operand:DI 3 "const_int_operand")
1533 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1534 (match_operand:SVE_2 6 "aarch64_maskload_else_operand")
1535 (mem:BLK (scratch))]
1536 UNSPEC_LD1_GATHER))]
1537 "TARGET_SVE && TARGET_NON_STREAMING"
1538 {@ [cons: =0, 1, 2, 3, 4, 5]
1539 [&w, Z, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d]
1540 [?w, Z, 0, i, Ui1, Upl] ^
1541 [&w, vgd, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%2.d, #%1]
1542 [?w, vgd, 0, i, Ui1, Upl] ^
1543 [&w, rk, w, i, Ui1, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d]
1544 [?w, rk, 0, i, Ui1, Upl] ^
1545 [&w, rk, w, i, i, Upl] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1546 [?w, rk, 0, i, i, Upl] ^
1550 ;; Likewise, but with the offset being extended from 32 bits.
1551 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_<su>xtw_unpacked"
1552 [(set (match_operand:SVE_2 0 "register_operand")
1554 [(match_operand:VNx2BI 5 "register_operand")
1555 (match_operand:DI 1 "register_operand")
1559 (match_operand:VNx2SI 2 "register_operand"))]
1561 (match_operand:DI 3 "const_int_operand")
1562 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1563 (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
1564 (mem:BLK (scratch))]
1565 UNSPEC_LD1_GATHER))]
1566 "TARGET_SVE && TARGET_NON_STREAMING"
1567 {@ [cons: =0, 1, 2, 3, 4, 5]
1568 [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw]
1569 [?w, rk, 0, i, Ui1, Upl ] ^
1570 [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, <su>xtw %p4]
1571 [?w, rk, 0, i, i, Upl ] ^
1573 "&& !CONSTANT_P (operands[6])"
1575 operands[6] = CONSTM1_RTX (VNx2BImode);
1579 ;; Likewise, but with the offset being truncated to 32 bits and then
1581 (define_insn_and_rewrite "*mask_gather_load<mode><v_int_container>_sxtw"
1582 [(set (match_operand:SVE_2 0 "register_operand")
1584 [(match_operand:VNx2BI 5 "register_operand")
1585 (match_operand:DI 1 "register_operand")
1590 (match_operand:VNx2DI 2 "register_operand")))]
1592 (match_operand:DI 3 "const_int_operand")
1593 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1594 (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
1595 (mem:BLK (scratch))]
1596 UNSPEC_LD1_GATHER))]
1597 "TARGET_SVE && TARGET_NON_STREAMING"
1598 {@ [cons: =0, 1, 2, 3, 4, 5]
1599 [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1600 [?w, rk, 0, i, Ui1, Upl ] ^
1601 [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1602 [?w, rk, 0, i, i, Upl ] ^
1604 "&& !CONSTANT_P (operands[6])"
1606 operands[6] = CONSTM1_RTX (VNx2BImode);
1610 ;; Likewise, but with the offset being truncated to 32 bits and then
1612 (define_insn "*mask_gather_load<mode><v_int_container>_uxtw"
1613 [(set (match_operand:SVE_2 0 "register_operand")
1615 [(match_operand:VNx2BI 5 "register_operand")
1616 (match_operand:DI 1 "register_operand")
1618 (match_operand:VNx2DI 2 "register_operand")
1619 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1620 (match_operand:DI 3 "const_int_operand")
1621 (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
1622 (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
1623 (mem:BLK (scratch))]
1624 UNSPEC_LD1_GATHER))]
1625 "TARGET_SVE && TARGET_NON_STREAMING"
1626 {@ [cons: =0, 1, 2, 3, 4, 5]
1627 [&w, rk, w, i, Ui1, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1628 [?w, rk, 0, i, Ui1, Upl ] ^
1629 [&w, rk, w, i, i, Upl ] ld1<Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1630 [?w, rk, 0, i, i, Upl ] ^
1634 ;; -------------------------------------------------------------------------
1635 ;; ---- Extending gather loads
1636 ;; -------------------------------------------------------------------------
1637 ;; Includes gather forms of:
1644 ;; -------------------------------------------------------------------------
1646 ;; Predicated extending gather loads for 32-bit elements. Operand 3 is
1647 ;; true for unsigned extension and false for signed extension.
1648 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_4HSI:mode><SVE_4BHI:mode>"
1649 [(set (match_operand:SVE_4HSI 0 "register_operand")
1651 [(match_operand:VNx4BI 6 "general_operand")
1652 (ANY_EXTEND:SVE_4HSI
1654 [(match_operand:VNx4BI 5 "register_operand")
1655 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_4BHI:Vesize>")
1656 (match_operand:VNx4SI 2 "register_operand")
1657 (match_operand:DI 3 "const_int_operand")
1658 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>")
1659 (match_operand:SVE_4BHI 7 "aarch64_maskload_else_operand")
1660 (mem:BLK (scratch))]
1661 UNSPEC_LD1_GATHER))]
1664 && TARGET_NON_STREAMING
1665 && (~<SVE_4HSI:narrower_mask> & <SVE_4BHI:self_mask>) == 0"
1666 {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1667 [&w, Z, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s]
1668 [?w, Z, 0, Ui1, Ui1, Upl, UplDnm] ^
1669 [&w, vg<SVE_4BHI:Vesize>, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1670 [?w, vg<SVE_4BHI:Vesize>, 0, Ui1, Ui1, Upl, UplDnm] ^
1671 [&w, rk, w, Z, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1672 [?w, rk, 0, Z, Ui1, Upl, UplDnm] ^
1673 [&w, rk, w, Ui1, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1674 [?w, rk, 0, Ui1, Ui1, Upl, UplDnm] ^
1675 [&w, rk, w, Z, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1676 [?w, rk, 0, Z, i, Upl, UplDnm] ^
1677 [&w, rk, w, Ui1, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_4BHI:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1678 [?w, rk, 0, Ui1, i, Upl, UplDnm] ^
1680 "&& !CONSTANT_P (operands[6])"
1682 operands[6] = CONSTM1_RTX (VNx4BImode);
1686 ;; Predicated extending gather loads for 64-bit elements. The value of
1687 ;; operand 3 doesn't matter in this case.
1688 (define_insn_and_rewrite "@aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>"
1689 [(set (match_operand:SVE_2HSDI 0 "register_operand")
1691 [(match_operand:VNx2BI 6 "general_operand")
1692 (ANY_EXTEND:SVE_2HSDI
1694 [(match_operand:VNx2BI 5 "register_operand")
1695 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_2BHSI:Vesize>")
1696 (match_operand:VNx2DI 2 "register_operand")
1697 (match_operand:DI 3 "const_int_operand")
1698 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1699 (match_operand:SVE_2BHSI 7 "aarch64_maskload_else_operand")
1700 (mem:BLK (scratch))]
1701 UNSPEC_LD1_GATHER))]
1704 && TARGET_NON_STREAMING
1705 && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1706 {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1707 [&w, Z, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d]
1708 [?w, Z, 0, i, Ui1, Upl, UplDnm] ^
1709 [&w, vg<SVE_2BHSI:Vesize>, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%2.d, #%1]
1710 [?w, vg<SVE_2BHSI:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
1711 [&w, rk, w, i, Ui1, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d]
1712 [?w, rk, 0, i, Ui1, Upl, UplDnm] ^
1713 [&w, rk, w, i, i, Upl, UplDnm] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1714 [?w, rk, 0, i, i, Upl, UplDnm] ^
1716 "&& !CONSTANT_P (operands[6])"
1718 operands[6] = CONSTM1_RTX (VNx2BImode);
1722 ;; Likewise, but with the offset being extended from 32 bits.
1723 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_<ANY_EXTEND2:su>xtw_unpacked"
1724 [(set (match_operand:SVE_2HSDI 0 "register_operand")
1727 (ANY_EXTEND:SVE_2HSDI
1729 [(match_operand:VNx2BI 5 "register_operand")
1730 (match_operand:DI 1 "aarch64_reg_or_zero")
1734 (match_operand:VNx2SI 2 "register_operand"))]
1736 (match_operand:DI 3 "const_int_operand")
1737 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1738 (match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
1739 (mem:BLK (scratch))]
1740 UNSPEC_LD1_GATHER))]
1743 && TARGET_NON_STREAMING
1744 && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1745 {@ [cons: =0, 1, 2, 3, 4, 5]
1746 [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw]
1747 [?w, rk, 0, i, Ui1, Upl ] ^
1748 [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, <ANY_EXTEND2:su>xtw %p4]
1749 [?w, rk, 0, i, i, Upl ] ^
1751 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1753 operands[6] = CONSTM1_RTX (VNx2BImode);
1754 operands[7] = CONSTM1_RTX (VNx2BImode);
1758 ;; Likewise, but with the offset being truncated to 32 bits and then
1760 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_sxtw"
1761 [(set (match_operand:SVE_2HSDI 0 "register_operand")
1764 (ANY_EXTEND:SVE_2HSDI
1766 [(match_operand:VNx2BI 5 "register_operand")
1767 (match_operand:DI 1 "aarch64_reg_or_zero")
1772 (match_operand:VNx2DI 2 "register_operand")))]
1774 (match_operand:DI 3 "const_int_operand")
1775 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1776 (match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
1777 (mem:BLK (scratch))]
1778 UNSPEC_LD1_GATHER))]
1781 && TARGET_NON_STREAMING
1782 && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1783 {@ [cons: =0, 1, 2, 3, 4, 5]
1784 [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
1785 [?w, rk, 0, i, Ui1, Upl ] ^
1786 [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1787 [?w, rk, 0, i, i, Upl ] ^
1789 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
1791 operands[6] = CONSTM1_RTX (VNx2BImode);
1792 operands[7] = CONSTM1_RTX (VNx2BImode);
1796 ;; Likewise, but with the offset being truncated to 32 bits and then
1798 (define_insn_and_rewrite "*aarch64_gather_load_<ANY_EXTEND:optab><SVE_2HSDI:mode><SVE_2BHSI:mode>_uxtw"
1799 [(set (match_operand:SVE_2HSDI 0 "register_operand")
1802 (ANY_EXTEND:SVE_2HSDI
1804 [(match_operand:VNx2BI 5 "register_operand")
1805 (match_operand:DI 1 "aarch64_reg_or_zero")
1807 (match_operand:VNx2DI 2 "register_operand")
1808 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1809 (match_operand:DI 3 "const_int_operand")
1810 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
1811 (match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
1812 (mem:BLK (scratch))]
1813 UNSPEC_LD1_GATHER))]
1816 && TARGET_NON_STREAMING
1817 && (~<SVE_2HSDI:narrower_mask> & <SVE_2BHSI:self_mask>) == 0"
1818 {@ [cons: =0, 1, 2, 3, 4, 5]
1819 [&w, rk, w, i, Ui1, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
1820 [?w, rk, 0, i, Ui1, Upl ] ^
1821 [&w, rk, w, i, i, Upl ] ld1<ANY_EXTEND:s><SVE_2BHSI:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1822 [?w, rk, 0, i, i, Upl ] ^
1824 "&& !CONSTANT_P (operands[7])"
1826 operands[7] = CONSTM1_RTX (VNx2BImode);
1830 ;; -------------------------------------------------------------------------
1831 ;; ---- First-faulting gather loads
1832 ;; -------------------------------------------------------------------------
1833 ;; Includes gather forms of:
1836 ;; -------------------------------------------------------------------------
1838 ;; Predicated first-faulting gather loads for 32-bit elements. Operand
1839 ;; 3 is true for unsigned extension and false for signed extension.
1840 (define_insn "@aarch64_ldff1_gather<mode>"
1841 [(set (match_operand:SVE_FULL_S 0 "register_operand")
1843 [(match_operand:VNx4BI 5 "register_operand")
1844 (match_operand:DI 1 "aarch64_sve_gather_offset_w")
1845 (match_operand:VNx4SI 2 "register_operand")
1846 (match_operand:DI 3 "const_int_operand")
1847 (match_operand:DI 4 "aarch64_gather_scale_operand_w")
1849 (reg:VNx16BI FFRT_REGNUM)]
1850 UNSPEC_LDFF1_GATHER))]
1851 "TARGET_SVE && TARGET_NON_STREAMING"
1852 {@ [cons: =0, 1, 2, 3, 4, 5 ]
1853 [&w, Z, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s]
1854 [?w, Z, 0, i, Ui1, Upl] ^
1855 [&w, vgw, w, i, Ui1, Upl] ldff1w\t%0.s, %5/z, [%2.s, #%1]
1856 [?w, vgw, 0, i, Ui1, Upl] ^
1857 [&w, rk, w, Z, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw]
1858 [?w, rk, 0, Z, Ui1, Upl] ^
1859 [&w, rk, w, Ui1, Ui1, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw]
1860 [?w, rk, 0, Ui1, Ui1, Upl] ^
1861 [&w, rk, w, Z, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1862 [?w, rk, 0, Z, i, Upl] ^
1863 [&w, rk, w, Ui1, i, Upl] ldff1w\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1864 [?w, rk, 0, Ui1, i, Upl] ^
1868 ;; Predicated first-faulting gather loads for 64-bit elements. The value
1869 ;; of operand 3 doesn't matter in this case.
1870 (define_insn "@aarch64_ldff1_gather<mode>"
1871 [(set (match_operand:SVE_FULL_D 0 "register_operand")
1873 [(match_operand:VNx2BI 5 "register_operand")
1874 (match_operand:DI 1 "aarch64_sve_gather_offset_d")
1875 (match_operand:VNx2DI 2 "register_operand")
1876 (match_operand:DI 3 "const_int_operand")
1877 (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1879 (reg:VNx16BI FFRT_REGNUM)]
1880 UNSPEC_LDFF1_GATHER))]
1881 "TARGET_SVE && TARGET_NON_STREAMING"
1882 {@ [cons: =0, 1, 2, 3, 4, 5 ]
1883 [&w, Z, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d]
1884 [?w, Z, 0, i, Ui1, Upl ] ^
1885 [&w, vgd, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%2.d, #%1]
1886 [?w, vgd, 0, i, Ui1, Upl ] ^
1887 [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d]
1888 [?w, rk, 0, i, Ui1, Upl ] ^
1889 [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, lsl %p4]
1890 [?w, rk, 0, i, i, Upl ] ^
1894 ;; Likewise, but with the offset being sign-extended from 32 bits.
1895 (define_insn_and_rewrite "*aarch64_ldff1_gather<mode>_sxtw"
1896 [(set (match_operand:SVE_FULL_D 0 "register_operand")
1898 [(match_operand:VNx2BI 5 "register_operand")
1899 (match_operand:DI 1 "register_operand")
1904 (match_operand:VNx2DI 2 "register_operand")))]
1906 (match_operand:DI 3 "const_int_operand")
1907 (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1909 (reg:VNx16BI FFRT_REGNUM)]
1910 UNSPEC_LDFF1_GATHER))]
1911 "TARGET_SVE && TARGET_NON_STREAMING"
1912 {@ [cons: =0, 1, 2, 3, 4, 5]
1913 [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw]
1914 [?w, rk, 0, i, Ui1, Upl ] ^
1915 [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
1916 [?w, rk, 0, i, i, Upl ] ^
1918 "&& !CONSTANT_P (operands[6])"
1920 operands[6] = CONSTM1_RTX (VNx2BImode);
1924 ;; Likewise, but with the offset being zero-extended from 32 bits.
1925 (define_insn "*aarch64_ldff1_gather<mode>_uxtw"
1926 [(set (match_operand:SVE_FULL_D 0 "register_operand")
1928 [(match_operand:VNx2BI 5 "register_operand")
1929 (match_operand:DI 1 "register_operand")
1931 (match_operand:VNx2DI 2 "register_operand")
1932 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
1933 (match_operand:DI 3 "const_int_operand")
1934 (match_operand:DI 4 "aarch64_gather_scale_operand_d")
1936 (reg:VNx16BI FFRT_REGNUM)]
1937 UNSPEC_LDFF1_GATHER))]
1938 "TARGET_SVE && TARGET_NON_STREAMING"
1939 {@ [cons: =0, 1, 2, 3, 4, 5]
1940 [&w, rk, w, i, Ui1, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw]
1941 [?w, rk, 0, i, Ui1, Upl ] ^
1942 [&w, rk, w, i, i, Upl ] ldff1d\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
1943 [?w, rk, 0, i, i, Upl ] ^
1947 ;; -------------------------------------------------------------------------
1948 ;; ---- First-faulting extending gather loads
1949 ;; -------------------------------------------------------------------------
1950 ;; Includes gather forms of:
1957 ;; -------------------------------------------------------------------------
1959 ;; Predicated extending first-faulting gather loads for 32-bit elements.
1960 ;; Operand 3 is true for unsigned extension and false for signed extension.
1961 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx4_WIDE:mode><VNx4_NARROW:mode>"
1962 [(set (match_operand:VNx4_WIDE 0 "register_operand")
1964 [(match_operand:VNx4BI 6 "general_operand")
1965 (ANY_EXTEND:VNx4_WIDE
1967 [(match_operand:VNx4BI 5 "register_operand")
1968 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>")
1969 (match_operand:VNx4_WIDE 2 "register_operand")
1970 (match_operand:DI 3 "const_int_operand")
1971 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>")
1973 (reg:VNx16BI FFRT_REGNUM)]
1974 UNSPEC_LDFF1_GATHER))]
1976 "TARGET_SVE && TARGET_NON_STREAMING"
1977 {@ [cons: =0, 1, 2, 3, 4, 5, 6]
1978 [&w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s]
1979 [?w, Z, 0, i, Ui1, Upl, UplDnm] ^
1980 [&w, vg<VNx4_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%2.s, #%1]
1981 [?w, vg<VNx4_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
1982 [&w, rk, w, Z, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw]
1983 [?w, rk, 0, Z, Ui1, Upl, UplDnm] ^
1984 [&w, rk, w, Ui1, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw]
1985 [?w, rk, 0, Ui1, Ui1, Upl, UplDnm] ^
1986 [&w, rk, w, Z, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, sxtw %p4]
1987 [?w, rk, 0, Z, i, Upl, UplDnm] ^
1988 [&w, rk, w, Ui1, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx4_NARROW:Vesize>\t%0.s, %5/z, [%1, %2.s, uxtw %p4]
1989 [?w, rk, 0, Ui1, i, Upl, UplDnm] ^
1991 "&& !CONSTANT_P (operands[6])"
1993 operands[6] = CONSTM1_RTX (VNx4BImode);
1997 ;; Predicated extending first-faulting gather loads for 64-bit elements.
1998 ;; The value of operand 3 doesn't matter in this case.
1999 (define_insn_and_rewrite "@aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>"
2000 [(set (match_operand:VNx2_WIDE 0 "register_operand")
2002 [(match_operand:VNx2BI 6 "general_operand")
2003 (ANY_EXTEND:VNx2_WIDE
2005 [(match_operand:VNx2BI 5 "register_operand")
2006 (match_operand:DI 1 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>")
2007 (match_operand:VNx2_WIDE 2 "register_operand")
2008 (match_operand:DI 3 "const_int_operand")
2009 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2011 (reg:VNx16BI FFRT_REGNUM)]
2012 UNSPEC_LDFF1_GATHER))]
2014 "TARGET_SVE && TARGET_NON_STREAMING"
2015 {@ [cons: =0, 1, 2, 3, 4, 5, 6]
2016 [&w, Z, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d]
2017 [?w, Z, 0, i, Ui1, Upl, UplDnm] ^
2018 [&w, vg<VNx2_NARROW:Vesize>, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%2.d, #%1]
2019 [?w, vg<VNx2_NARROW:Vesize>, 0, i, Ui1, Upl, UplDnm] ^
2020 [&w, rk, w, i, Ui1, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d]
2021 [?w, rk, 0, i, Ui1, Upl, UplDnm] ^
2022 [&w, rk, w, i, i, Upl, UplDnm] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, lsl %p4]
2023 [?w, rk, w, i, i, Upl, UplDnm] ^
2025 "&& !CONSTANT_P (operands[6])"
2027 operands[6] = CONSTM1_RTX (VNx2BImode);
2031 ;; Likewise, but with the offset being sign-extended from 32 bits.
2032 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_sxtw"
2033 [(set (match_operand:VNx2_WIDE 0 "register_operand")
2036 (ANY_EXTEND:VNx2_WIDE
2038 [(match_operand:VNx2BI 5 "register_operand")
2039 (match_operand:DI 1 "aarch64_reg_or_zero")
2044 (match_operand:VNx2DI 2 "register_operand")))]
2046 (match_operand:DI 3 "const_int_operand")
2047 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2049 (reg:VNx16BI FFRT_REGNUM)]
2050 UNSPEC_LDFF1_GATHER))]
2052 "TARGET_SVE && TARGET_NON_STREAMING"
2053 {@ [cons: =0, 1, 2, 3, 4, 5]
2054 [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw]
2055 [?w, rk, 0, i, Ui1, Upl ] ^
2056 [&w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, sxtw %p4]
2057 [?w, rk, 0, i, i, Upl ] ^
2059 "&& (!CONSTANT_P (operands[6]) || !CONSTANT_P (operands[7]))"
2061 operands[6] = CONSTM1_RTX (VNx2BImode);
2062 operands[7] = CONSTM1_RTX (VNx2BImode);
2066 ;; Likewise, but with the offset being zero-extended from 32 bits.
2067 (define_insn_and_rewrite "*aarch64_ldff1_gather_<ANY_EXTEND:optab><VNx2_WIDE:mode><VNx2_NARROW:mode>_uxtw"
2068 [(set (match_operand:VNx2_WIDE 0 "register_operand")
2071 (ANY_EXTEND:VNx2_WIDE
2073 [(match_operand:VNx2BI 5 "register_operand")
2074 (match_operand:DI 1 "aarch64_reg_or_zero")
2076 (match_operand:VNx2DI 2 "register_operand")
2077 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2078 (match_operand:DI 3 "const_int_operand")
2079 (match_operand:DI 4 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>")
2081 (reg:VNx16BI FFRT_REGNUM)]
2082 UNSPEC_LDFF1_GATHER))]
2084 "TARGET_SVE && TARGET_NON_STREAMING"
2085 {@ [cons: =0, 1, 2, 3, 4, 5]
2086 [&w, rk, w, i, Ui1, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw]
2087 [?w, rk, 0, i, Ui1, Upl ] ^
2088 [&w, rk, w, i, i, Upl ] ldff1<ANY_EXTEND:s><VNx2_NARROW:Vesize>\t%0.d, %5/z, [%1, %2.d, uxtw %p4]
2089 [?w, rk, 0, i, i, Upl ] ^
2091 "&& !CONSTANT_P (operands[7])"
2093 operands[7] = CONSTM1_RTX (VNx2BImode);
2097 ;; =========================================================================
2099 ;; =========================================================================
2101 ;; -------------------------------------------------------------------------
2102 ;; ---- Contiguous prefetches
2103 ;; -------------------------------------------------------------------------
2104 ;; Includes contiguous forms of:
2109 ;; -------------------------------------------------------------------------
2111 ;; Contiguous predicated prefetches. Operand 2 gives the real prefetch
2112 ;; operation (as an svprfop), with operands 3 and 4 providing distilled
2114 (define_insn "@aarch64_sve_prefetch<mode>"
2115 [(prefetch (unspec:DI
2116 [(match_operand:<VPRED> 0 "register_operand" "Upl")
2117 (match_operand:SVE_FULL_I 1 "aarch64_sve_prefetch_operand" "UP<Vesize>")
2118 (match_operand:DI 2 "const_int_operand")]
2119 UNSPEC_SVE_PREFETCH)
2120 (match_operand:DI 3 "const_int_operand")
2121 (match_operand:DI 4 "const_int_operand"))]
2124 operands[1] = gen_rtx_MEM (<MODE>mode, operands[1]);
2125 return aarch64_output_sve_prefetch ("prf<Vesize>", operands[2], "%0, %1");
2129 ;; -------------------------------------------------------------------------
2130 ;; ---- Gather prefetches
2131 ;; -------------------------------------------------------------------------
2132 ;; Includes gather forms of:
2137 ;; -------------------------------------------------------------------------
2139 ;; Predicated gather prefetches for 32-bit bases and offsets. The operands
2141 ;; 0: the governing predicate
2142 ;; 1: the scalar component of the address
2143 ;; 2: the vector component of the address
2144 ;; 3: 1 for zero extension, 0 for sign extension
2145 ;; 4: the scale multiplier
2146 ;; 5: a vector zero that identifies the mode of data being accessed
2147 ;; 6: the prefetch operator (an svprfop)
2148 ;; 7: the normal RTL prefetch rw flag
2149 ;; 8: the normal RTL prefetch locality value
2150 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx4SI_ONLY:mode>"
2151 [(prefetch (unspec:DI
2152 [(match_operand:VNx4BI 0 "register_operand" "Upl, Upl, Upl, Upl, Upl, Upl")
2153 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk, rk, rk")
2154 (match_operand:VNx4SI_ONLY 2 "register_operand" "w, w, w, w, w, w")
2155 (match_operand:DI 3 "const_int_operand" "i, i, Z, Ui1, Z, Ui1")
2156 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2157 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2158 (match_operand:DI 6 "const_int_operand")]
2159 UNSPEC_SVE_PREFETCH_GATHER)
2160 (match_operand:DI 7 "const_int_operand")
2161 (match_operand:DI 8 "const_int_operand"))]
2162 "TARGET_SVE && TARGET_NON_STREAMING"
2164 static const char *const insns[][2] = {
2165 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s]",
2166 "prf<SVE_FULL_I:Vesize>", "%0, [%2.s, #%1]",
2167 "prfb", "%0, [%1, %2.s, sxtw]",
2168 "prfb", "%0, [%1, %2.s, uxtw]",
2169 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, sxtw %p4]",
2170 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.s, uxtw %p4]"
2172 const char *const *parts = insns[which_alternative];
2173 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2177 ;; Predicated gather prefetches for 64-bit elements. The value of operand 3
2178 ;; doesn't matter in this case.
2179 (define_insn "@aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>"
2180 [(prefetch (unspec:DI
2181 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl, Upl, Upl")
2182 (match_operand:DI 1 "aarch64_sve_gather_offset_<SVE_FULL_I:Vesize>" "Z, vg<SVE_FULL_I:Vesize>, rk, rk")
2183 (match_operand:VNx2DI_ONLY 2 "register_operand" "w, w, w, w")
2184 (match_operand:DI 3 "const_int_operand")
2185 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, Ui1, Ui1, i")
2186 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2187 (match_operand:DI 6 "const_int_operand")]
2188 UNSPEC_SVE_PREFETCH_GATHER)
2189 (match_operand:DI 7 "const_int_operand")
2190 (match_operand:DI 8 "const_int_operand"))]
2191 "TARGET_SVE && TARGET_NON_STREAMING"
2193 static const char *const insns[][2] = {
2194 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d]",
2195 "prf<SVE_FULL_I:Vesize>", "%0, [%2.d, #%1]",
2196 "prfb", "%0, [%1, %2.d]",
2197 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, lsl %p4]"
2199 const char *const *parts = insns[which_alternative];
2200 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2204 ;; Likewise, but with the offset being sign-extended from 32 bits.
2205 (define_insn_and_rewrite "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_sxtw"
2206 [(prefetch (unspec:DI
2207 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2208 (match_operand:DI 1 "register_operand" "rk, rk")
2213 (match_operand:VNx2DI 2 "register_operand" "w, w")))]
2215 (match_operand:DI 3 "const_int_operand")
2216 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2217 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2218 (match_operand:DI 6 "const_int_operand")]
2219 UNSPEC_SVE_PREFETCH_GATHER)
2220 (match_operand:DI 7 "const_int_operand")
2221 (match_operand:DI 8 "const_int_operand"))]
2222 "TARGET_SVE && TARGET_NON_STREAMING"
2224 static const char *const insns[][2] = {
2225 "prfb", "%0, [%1, %2.d, sxtw]",
2226 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, sxtw %p4]"
2228 const char *const *parts = insns[which_alternative];
2229 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2231 "&& !rtx_equal_p (operands[0], operands[9])"
2233 operands[9] = copy_rtx (operands[0]);
2237 ;; Likewise, but with the offset being zero-extended from 32 bits.
2238 (define_insn "*aarch64_sve_gather_prefetch<SVE_FULL_I:mode><VNx2DI_ONLY:mode>_uxtw"
2239 [(prefetch (unspec:DI
2240 [(match_operand:VNx2BI 0 "register_operand" "Upl, Upl")
2241 (match_operand:DI 1 "register_operand" "rk, rk")
2243 (match_operand:VNx2DI 2 "register_operand" "w, w")
2244 (match_operand:VNx2DI 9 "aarch64_sve_uxtw_immediate"))
2245 (match_operand:DI 3 "const_int_operand")
2246 (match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_FULL_I:Vesize>" "Ui1, i")
2247 (match_operand:SVE_FULL_I 5 "aarch64_simd_imm_zero")
2248 (match_operand:DI 6 "const_int_operand")]
2249 UNSPEC_SVE_PREFETCH_GATHER)
2250 (match_operand:DI 7 "const_int_operand")
2251 (match_operand:DI 8 "const_int_operand"))]
2252 "TARGET_SVE && TARGET_NON_STREAMING"
2254 static const char *const insns[][2] = {
2255 "prfb", "%0, [%1, %2.d, uxtw]",
2256 "prf<SVE_FULL_I:Vesize>", "%0, [%1, %2.d, uxtw %p4]"
2258 const char *const *parts = insns[which_alternative];
2259 return aarch64_output_sve_prefetch (parts[0], operands[6], parts[1]);
2263 ;; =========================================================================
2265 ;; =========================================================================
2267 ;; -------------------------------------------------------------------------
2268 ;; ---- Normal contiguous stores
2269 ;; -------------------------------------------------------------------------
2270 ;; Includes contiguous forms of:
2287 ;; -------------------------------------------------------------------------
2289 ;; Predicated ST1 (single).
2290 (define_insn "maskstore<mode><vpred>"
2291 [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
2293 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2294 (match_operand:SVE_ALL 1 "register_operand" "w")
2298 "st1<Vesize>\t%1.<Vctype>, %2, %0"
2301 ;; Unpredicated ST[234]. This is always a full update, so the dependence
2302 ;; on the old value of the memory location (via (match_dup 0)) is redundant.
2303 ;; There doesn't seem to be any obvious benefit to treating the all-true
2304 ;; case differently though. In particular, it's very unlikely that we'll
2305 ;; only find out during RTL that a store_lanes is dead.
2306 (define_expand "vec_store_lanes<mode><vsingle>"
2307 [(set (match_operand:SVE_STRUCT 0 "memory_operand")
2310 (match_operand:SVE_STRUCT 1 "register_operand")
2315 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
2319 ;; Predicated ST[234].
2320 (define_insn "vec_mask_store_lanes<mode><vsingle>"
2321 [(set (match_operand:SVE_STRUCT 0 "memory_operand" "+m")
2323 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2324 (match_operand:SVE_STRUCT 1 "register_operand" "w")
2328 "st<vector_count><Vesize>\t%1, %2, %0"
2331 ;; -------------------------------------------------------------------------
2332 ;; ---- Truncating contiguous stores
2333 ;; -------------------------------------------------------------------------
2338 ;; -------------------------------------------------------------------------
2340 ;; Predicated truncate and store, with 8 elements per 128-bit block.
2341 (define_insn "@aarch64_store_trunc<VNx8_NARROW:mode><VNx8_WIDE:mode>"
2342 [(set (match_operand:VNx8_NARROW 0 "memory_operand" "+m")
2344 [(match_operand:VNx8BI 2 "register_operand" "Upl")
2345 (truncate:VNx8_NARROW
2346 (match_operand:VNx8_WIDE 1 "register_operand" "w"))
2350 "st1<VNx8_NARROW:Vesize>\t%1.<VNx8_WIDE:Vetype>, %2, %0"
2353 ;; Predicated truncate and store, with 4 elements per 128-bit block.
2354 (define_insn "@aarch64_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2355 [(set (match_operand:VNx4_NARROW 0 "memory_operand" "+m")
2357 [(match_operand:VNx4BI 2 "register_operand" "Upl")
2358 (truncate:VNx4_NARROW
2359 (match_operand:VNx4_WIDE 1 "register_operand" "w"))
2363 "st1<VNx4_NARROW:Vesize>\t%1.<VNx4_WIDE:Vetype>, %2, %0"
2366 ;; Predicated truncate and store, with 2 elements per 128-bit block.
2367 (define_insn "@aarch64_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2368 [(set (match_operand:VNx2_NARROW 0 "memory_operand" "+m")
2370 [(match_operand:VNx2BI 2 "register_operand" "Upl")
2371 (truncate:VNx2_NARROW
2372 (match_operand:VNx2_WIDE 1 "register_operand" "w"))
2376 "st1<VNx2_NARROW:Vesize>\t%1.<VNx2_WIDE:Vetype>, %2, %0"
2379 ;; -------------------------------------------------------------------------
2380 ;; ---- Non-temporal contiguous stores
2381 ;; -------------------------------------------------------------------------
2387 ;; -------------------------------------------------------------------------
2389 (define_insn "@aarch64_stnt1<mode>"
2390 [(set (match_operand:SVE_FULL 0 "memory_operand" "+m")
2392 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2393 (match_operand:SVE_FULL 1 "register_operand" "w")
2397 "stnt1<Vesize>\t%1.<Vetype>, %2, %0"
2400 ;; -------------------------------------------------------------------------
2401 ;; ---- Normal scatter stores
2402 ;; -------------------------------------------------------------------------
2403 ;; Includes scatter forms of:
2406 ;; -------------------------------------------------------------------------
2408 ;; Unpredicated scatter stores.
2409 (define_expand "scatter_store<mode><v_int_container>"
2410 [(set (mem:BLK (scratch))
2413 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2414 (match_operand:<V_INT_CONTAINER> 1 "register_operand")
2415 (match_operand:DI 2 "const_int_operand")
2416 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2417 (match_operand:SVE_24 4 "register_operand")]
2418 UNSPEC_ST1_SCATTER))]
2419 "TARGET_SVE && TARGET_NON_STREAMING"
2421 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
2425 ;; Predicated scatter stores for 32-bit elements. Operand 2 is true for
2426 ;; unsigned extension and false for signed extension.
2427 (define_insn "mask_scatter_store<mode><v_int_container>"
2428 [(set (mem:BLK (scratch))
2430 [(match_operand:VNx4BI 5 "register_operand")
2431 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2432 (match_operand:VNx4SI 1 "register_operand")
2433 (match_operand:DI 2 "const_int_operand")
2434 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2435 (match_operand:SVE_4 4 "register_operand")]
2436 UNSPEC_ST1_SCATTER))]
2437 "TARGET_SVE && TARGET_NON_STREAMING"
2438 {@ [ cons: 0 , 1 , 2 , 3 , 4 , 5 ]
2439 [ Z , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s]
2440 [ vgw , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%1.s, #%0]
2441 [ rk , w , Z , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2442 [ rk , w , Ui1 , Ui1 , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2443 [ rk , w , Z , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2444 [ rk , w , Ui1 , i , w , Upl ] st1<Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2448 ;; Predicated scatter stores for 64-bit elements. The value of operand 2
2449 ;; doesn't matter in this case.
2450 (define_insn "mask_scatter_store<mode><v_int_container>"
2451 [(set (mem:BLK (scratch))
2453 [(match_operand:VNx2BI 5 "register_operand")
2454 (match_operand:DI 0 "aarch64_sve_gather_offset_<Vesize>")
2455 (match_operand:VNx2DI 1 "register_operand")
2456 (match_operand:DI 2 "const_int_operand")
2457 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2458 (match_operand:SVE_2 4 "register_operand")]
2459 UNSPEC_ST1_SCATTER))]
2460 "TARGET_SVE && TARGET_NON_STREAMING"
2461 {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2462 [ Z , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d]
2463 [ vgd , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%1.d, #%0]
2464 [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d]
2465 [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2469 ;; Likewise, but with the offset being extended from 32 bits.
2470 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_<su>xtw_unpacked"
2471 [(set (mem:BLK (scratch))
2473 [(match_operand:VNx2BI 5 "register_operand")
2474 (match_operand:DI 0 "register_operand")
2478 (match_operand:VNx2SI 1 "register_operand"))]
2480 (match_operand:DI 2 "const_int_operand")
2481 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2482 (match_operand:SVE_2 4 "register_operand")]
2483 UNSPEC_ST1_SCATTER))]
2484 "TARGET_SVE && TARGET_NON_STREAMING"
2485 {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2486 [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw]
2487 [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, <su>xtw %p3]
2489 "&& !CONSTANT_P (operands[6])"
2491 operands[6] = CONSTM1_RTX (<VPRED>mode);
2495 ;; Likewise, but with the offset being truncated to 32 bits and then
2497 (define_insn_and_rewrite "*mask_scatter_store<mode><v_int_container>_sxtw"
2498 [(set (mem:BLK (scratch))
2500 [(match_operand:VNx2BI 5 "register_operand")
2501 (match_operand:DI 0 "register_operand")
2506 (match_operand:VNx2DI 1 "register_operand")))]
2508 (match_operand:DI 2 "const_int_operand")
2509 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2510 (match_operand:SVE_2 4 "register_operand")]
2511 UNSPEC_ST1_SCATTER))]
2512 "TARGET_SVE && TARGET_NON_STREAMING"
2513 {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2514 [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2515 [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]
2517 "&& !CONSTANT_P (operands[6])"
2519 operands[6] = CONSTM1_RTX (<VPRED>mode);
2523 ;; Likewise, but with the offset being truncated to 32 bits and then
2525 (define_insn "*mask_scatter_store<mode><v_int_container>_uxtw"
2526 [(set (mem:BLK (scratch))
2528 [(match_operand:VNx2BI 5 "register_operand")
2529 (match_operand:DI 0 "aarch64_reg_or_zero")
2531 (match_operand:VNx2DI 1 "register_operand")
2532 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2533 (match_operand:DI 2 "const_int_operand")
2534 (match_operand:DI 3 "aarch64_gather_scale_operand_<Vesize>")
2535 (match_operand:SVE_2 4 "register_operand")]
2536 UNSPEC_ST1_SCATTER))]
2537 "TARGET_SVE && TARGET_NON_STREAMING"
2538 {@ [ cons: 0 , 1 , 3 , 4 , 5 ]
2539 [ rk , w , Ui1 , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2540 [ rk , w , i , w , Upl ] st1<Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]
2544 ;; -------------------------------------------------------------------------
2545 ;; ---- Truncating scatter stores
2546 ;; -------------------------------------------------------------------------
2547 ;; Includes scatter forms of:
2551 ;; -------------------------------------------------------------------------
2553 ;; Predicated truncating scatter stores for 32-bit elements. Operand 2 is
2554 ;; true for unsigned extension and false for signed extension.
2555 (define_insn "@aarch64_scatter_store_trunc<VNx4_NARROW:mode><VNx4_WIDE:mode>"
2556 [(set (mem:BLK (scratch))
2558 [(match_operand:VNx4BI 5 "register_operand")
2559 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx4_NARROW:Vesize>" "Z, vg<VNx4_NARROW:Vesize>, rk, rk, rk, rk")
2560 (match_operand:VNx4SI 1 "register_operand")
2561 (match_operand:DI 2 "const_int_operand")
2562 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx4_NARROW:Vesize>" "Ui1, Ui1, Ui1, Ui1, i, i")
2563 (truncate:VNx4_NARROW
2564 (match_operand:VNx4_WIDE 4 "register_operand"))]
2565 UNSPEC_ST1_SCATTER))]
2566 "TARGET_SVE && TARGET_NON_STREAMING"
2567 {@ [ cons: 1 , 2 , 4 , 5 ]
2568 [ w , Ui1 , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s]
2569 [ w , Ui1 , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%1.s, #%0]
2570 [ w , Z , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw]
2571 [ w , Ui1 , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw]
2572 [ w , Z , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, sxtw %p3]
2573 [ w , Ui1 , w , Upl ] st1<VNx4_NARROW:Vesize>\t%4.s, %5, [%0, %1.s, uxtw %p3]
2577 ;; Predicated truncating scatter stores for 64-bit elements. The value of
2578 ;; operand 2 doesn't matter in this case.
2579 (define_insn "@aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>"
2580 [(set (mem:BLK (scratch))
2582 [(match_operand:VNx2BI 5 "register_operand")
2583 (match_operand:DI 0 "aarch64_sve_gather_offset_<VNx2_NARROW:Vesize>" "Z, vg<VNx2_NARROW:Vesize>, rk, rk")
2584 (match_operand:VNx2DI 1 "register_operand")
2585 (match_operand:DI 2 "const_int_operand")
2586 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, Ui1, Ui1, i")
2587 (truncate:VNx2_NARROW
2588 (match_operand:VNx2_WIDE 4 "register_operand"))]
2589 UNSPEC_ST1_SCATTER))]
2590 "TARGET_SVE && TARGET_NON_STREAMING"
2591 {@ [ cons: 1 , 4 , 5 ]
2592 [ w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d]
2593 [ w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%1.d, #%0]
2594 [ w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d]
2595 [ w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, lsl %p3]
2599 ;; Likewise, but with the offset being sign-extended from 32 bits.
2600 (define_insn_and_rewrite "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_sxtw"
2601 [(set (mem:BLK (scratch))
2603 [(match_operand:VNx2BI 5 "register_operand")
2604 (match_operand:DI 0 "register_operand")
2609 (match_operand:VNx2DI 1 "register_operand")))]
2611 (match_operand:DI 2 "const_int_operand")
2612 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2613 (truncate:VNx2_NARROW
2614 (match_operand:VNx2_WIDE 4 "register_operand"))]
2615 UNSPEC_ST1_SCATTER))]
2616 "TARGET_SVE && TARGET_NON_STREAMING"
2617 {@ [ cons: 0 , 1 , 4 , 5 ]
2618 [ rk , w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw]
2619 [ rk , w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, sxtw %p3]
2621 "&& !rtx_equal_p (operands[5], operands[6])"
2623 operands[6] = copy_rtx (operands[5]);
2627 ;; Likewise, but with the offset being zero-extended from 32 bits.
2628 (define_insn "*aarch64_scatter_store_trunc<VNx2_NARROW:mode><VNx2_WIDE:mode>_uxtw"
2629 [(set (mem:BLK (scratch))
2631 [(match_operand:VNx2BI 5 "register_operand")
2632 (match_operand:DI 0 "aarch64_reg_or_zero")
2634 (match_operand:VNx2DI 1 "register_operand")
2635 (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
2636 (match_operand:DI 2 "const_int_operand")
2637 (match_operand:DI 3 "aarch64_gather_scale_operand_<VNx2_NARROW:Vesize>" "Ui1, i")
2638 (truncate:VNx2_NARROW
2639 (match_operand:VNx2_WIDE 4 "register_operand"))]
2640 UNSPEC_ST1_SCATTER))]
2641 "TARGET_SVE && TARGET_NON_STREAMING"
2642 {@ [ cons: 0 , 1 , 4 , 5 ]
2643 [ rk , w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw]
2644 [ rk , w , w , Upl ] st1<VNx2_NARROW:Vesize>\t%4.d, %5, [%0, %1.d, uxtw %p3]
2648 ;; =========================================================================
2649 ;; == Vector creation
2650 ;; =========================================================================
2652 ;; -------------------------------------------------------------------------
2653 ;; ---- [INT,FP] Duplicate element
2654 ;; -------------------------------------------------------------------------
2670 ;; -------------------------------------------------------------------------
2672 (define_expand "vec_duplicate<mode>"
2674 [(set (match_operand:SVE_ALL 0 "register_operand")
2675 (vec_duplicate:SVE_ALL
2676 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2677 (clobber (scratch:VNx16BI))])]
2680 if (MEM_P (operands[1]))
2682 rtx ptrue = aarch64_ptrue_reg (<VPRED>mode);
2683 emit_insn (gen_sve_ld1r<mode> (operands[0], ptrue, operands[1],
2684 CONST0_RTX (<MODE>mode)));
2690 ;; Accept memory operands for the benefit of combine, and also in case
2691 ;; the scalar input gets spilled to memory during RA. We want to split
2692 ;; the load at the first opportunity in order to allow the PTRUE to be
2693 ;; optimized with surrounding code.
2694 (define_insn_and_split "*vec_duplicate<mode>_reg"
2695 [(set (match_operand:SVE_ALL 0 "register_operand")
2696 (vec_duplicate:SVE_ALL
2697 (match_operand:<VEL> 1 "aarch64_sve_dup_operand")))
2698 (clobber (match_scratch:VNx16BI 2 "=X, X, Upl"))]
2700 {@ [ cons: =0 , 1 ; attrs: length ]
2701 [ w , r ; 4 ] mov\t%0.<Vetype>, %<vwcore>1
2702 [ w , w ; 4 ] mov\t%0.<Vetype>, %<Vetype>1
2705 "&& MEM_P (operands[1])"
2708 if (GET_CODE (operands[2]) == SCRATCH)
2709 operands[2] = gen_reg_rtx (VNx16BImode);
2710 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2711 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2712 emit_insn (gen_sve_ld1r<mode> (operands[0], gp, operands[1],
2713 CONST0_RTX (<MODE>mode)));
2718 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (LE version).
2720 ;; The addressing mode range of LD1RQ does not match the addressing mode
2721 ;; range of LDR Qn. If the predicate enforced the LD1RQ range, we would
2722 ;; not be able to combine LDR Qns outside that range. The predicate
2723 ;; therefore accepts all memory operands, with only the constraints
2724 ;; enforcing the actual restrictions. If the instruction is split
2725 ;; before RA, we need to load invalid addresses into a temporary.
2727 (define_insn_and_split "@aarch64_vec_duplicate_vq<mode>_le"
2728 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, w")
2729 (vec_duplicate:SVE_FULL
2730 (match_operand:<V128> 1 "nonimmediate_operand" "w, UtQ")))
2731 (clobber (match_scratch:VNx16BI 2 "=X, Upl"))]
2732 "TARGET_SVE && !BYTES_BIG_ENDIAN"
2734 switch (which_alternative)
2737 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2738 return "dup\t%0.q, %1.q[0]";
2745 "&& MEM_P (operands[1])"
2748 if (can_create_pseudo_p ()
2749 && !aarch64_sve_ld1rq_operand (operands[1], <V128>mode))
2750 operands[1] = force_reload_address (operands[1]);
2751 if (GET_CODE (operands[2]) == SCRATCH)
2752 operands[2] = gen_reg_rtx (VNx16BImode);
2753 emit_move_insn (operands[2], CONSTM1_RTX (VNx16BImode));
2754 rtx gp = gen_lowpart (<VPRED>mode, operands[2]);
2755 emit_insn (gen_aarch64_sve_ld1rq<mode> (operands[0], operands[1], gp));
2760 ;; Duplicate an Advanced SIMD vector to fill an SVE vector (BE version).
2761 ;; The SVE register layout puts memory lane N into (architectural)
2762 ;; register lane N, whereas the Advanced SIMD layout puts the memory
2763 ;; lsb into the register lsb. We therefore have to describe this in rtl
2764 ;; terms as a reverse of the V128 vector followed by a duplicate.
2765 (define_insn "@aarch64_vec_duplicate_vq<mode>_be"
2766 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2767 (vec_duplicate:SVE_FULL
2769 (match_operand:<V128> 1 "register_operand" "w")
2770 (match_operand 2 "descending_int_parallel"))))]
2773 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
2774 GET_MODE_NUNITS (<V128>mode) - 1)"
2776 operands[1] = gen_rtx_REG (<MODE>mode, REGNO (operands[1]));
2777 return "dup\t%0.q, %1.q[0]";
2781 ;; This is used for vec_duplicate<mode>s from memory, but can also
2782 ;; be used by combine to optimize selects of a vec_duplicate<mode>
2784 (define_insn "sve_ld1r<mode>"
2785 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
2787 [(match_operand:<VPRED> 1 "register_operand" "Upl")
2788 (vec_duplicate:SVE_ALL
2789 (match_operand:<VEL> 2 "aarch64_sve_ld1r_operand" "Uty"))
2790 (match_operand:SVE_ALL 3 "aarch64_simd_imm_zero")]
2793 "ld1r<Vesize>\t%0.<Vetype>, %1/z, %2"
2796 ;; Load 128 bits from memory under predicate control and duplicate to
2798 (define_insn "@aarch64_sve_ld1rq<mode>"
2799 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2801 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2802 (match_operand:<V128> 1 "aarch64_sve_ld1rq_operand" "UtQ")]
2806 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2807 return "ld1rq<Vesize>\t%0.<Vetype>, %2/z, %1";
2811 (define_insn "@aarch64_sve_ld1ro<mode>"
2812 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
2814 [(match_operand:<VPRED> 2 "register_operand" "Upl")
2815 (match_operand:OI 1 "aarch64_sve_ld1ro_operand_<Vesize>"
2818 "TARGET_SVE_F64MM && TARGET_NON_STREAMING"
2820 operands[1] = gen_rtx_MEM (<VEL>mode, XEXP (operands[1], 0));
2821 return "ld1ro<Vesize>\t%0.<Vetype>, %2/z, %1";
2825 ;; -------------------------------------------------------------------------
2826 ;; ---- [INT,FP] Initialize from individual elements
2827 ;; -------------------------------------------------------------------------
2830 ;; -------------------------------------------------------------------------
2832 (define_expand "vec_init<mode><Vel>"
2833 [(match_operand:SVE_FULL 0 "register_operand")
2834 (match_operand 1 "")]
2837 aarch64_sve_expand_vector_init (operands[0], operands[1]);
2842 ;; Vector constructor combining two half vectors { a, b }
2843 (define_expand "vec_init<mode><Vhalf>"
2844 [(match_operand:SVE_NO2E 0 "register_operand")
2845 (match_operand 1 "")]
2848 aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
2853 ;; Vector constructor combining four quad vectors { a, b, c, d }
2854 (define_expand "vec_init<mode><Vquad>"
2855 [(match_operand:SVE_NO4E 0 "register_operand")
2856 (match_operand 1 "")]
2859 aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
2864 ;; Vector constructor combining eight vectors { a, b, c, d, ... }
2865 (define_expand "vec_initvnx16qivnx2qi"
2866 [(match_operand:VNx16QI 0 "register_operand")
2867 (match_operand 1 "")]
2870 aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
2875 ;; Shift an SVE vector left and insert a scalar into element 0.
2876 (define_insn "vec_shl_insert_<mode>"
2877 [(set (match_operand:SVE_FULL 0 "register_operand")
2879 [(match_operand:SVE_FULL 1 "register_operand")
2880 (match_operand:<VEL> 2 "aarch64_reg_or_zero")]
2883 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
2884 [ ?w , 0 , rZ ; * ] insr\t%0.<Vetype>, %<vwcore>2
2885 [ w , 0 , w ; * ] insr\t%0.<Vetype>, %<Vetype>2
2886 [ ??&w , w , rZ ; yes ] movprfx\t%0, %1\;insr\t%0.<Vetype>, %<vwcore>2
2887 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;insr\t%0.<Vetype>, %<Vetype>2
2891 ;; -------------------------------------------------------------------------
2892 ;; ---- [INT] Linear series
2893 ;; -------------------------------------------------------------------------
2896 ;; -------------------------------------------------------------------------
2898 (define_insn "vec_series<mode>"
2899 [(set (match_operand:SVE_I 0 "register_operand")
2901 (match_operand:<VEL> 1 "aarch64_sve_index_operand")
2902 (match_operand:<VEL> 2 "aarch64_sve_index_operand")))]
2904 {@ [ cons: =0 , 1 , 2 ]
2905 [ w , Usi , r ] index\t%0.<Vctype>, #%1, %<vccore>2
2906 [ w , r , Usi ] index\t%0.<Vctype>, %<vccore>1, #%2
2907 [ w , r , r ] index\t%0.<Vctype>, %<vccore>1, %<vccore>2
2911 ;; Optimize {x, x, x, x, ...} + {0, n, 2*n, 3*n, ...} if n is in range
2912 ;; of an INDEX instruction.
2913 (define_insn "*vec_series<mode>_plus"
2914 [(set (match_operand:SVE_I 0 "register_operand" "=w")
2916 (vec_duplicate:SVE_I
2917 (match_operand:<VEL> 1 "register_operand" "r"))
2918 (match_operand:SVE_I 2 "immediate_operand")))]
2919 "TARGET_SVE && aarch64_check_zero_based_sve_index_immediate (operands[2])"
2921 operands[2] = aarch64_check_zero_based_sve_index_immediate (operands[2]);
2922 return "index\t%0.<Vctype>, %<vccore>1, #%2";
2926 ;; -------------------------------------------------------------------------
2927 ;; ---- [PRED] Duplicate element
2928 ;; -------------------------------------------------------------------------
2929 ;; The patterns in this section are synthetic.
2930 ;; -------------------------------------------------------------------------
2932 ;; Implement a predicate broadcast by shifting the low bit of the scalar
2933 ;; input into the top bit and using a WHILELO. An alternative would be to
2934 ;; duplicate the input and do a compare with zero.
2935 (define_expand "vec_duplicate<mode>"
2936 [(set (match_operand:PRED_ALL 0 "register_operand")
2937 (vec_duplicate:PRED_ALL (match_operand:QI 1 "register_operand")))]
2940 rtx tmp = gen_reg_rtx (DImode);
2941 rtx op1 = gen_lowpart (DImode, operands[1]);
2942 emit_insn (gen_ashldi3 (tmp, op1, gen_int_mode (63, DImode)));
2943 emit_insn (gen_while_ultdi<mode> (operands[0], const0_rtx, tmp));
2948 ;; =========================================================================
2949 ;; == Vector decomposition
2950 ;; =========================================================================
2952 ;; -------------------------------------------------------------------------
2953 ;; ---- [INT,FP] Extract index
2954 ;; -------------------------------------------------------------------------
2956 ;; - DUP (Advanced SIMD)
2959 ;; - ST1 (Advanced SIMD)
2960 ;; - UMOV (Advanced SIMD)
2961 ;; -------------------------------------------------------------------------
2963 (define_expand "vec_extract<mode><Vel>"
2964 [(set (match_operand:<VEL> 0 "register_operand")
2966 (match_operand:SVE_FULL 1 "register_operand")
2967 (parallel [(match_operand:SI 2 "nonmemory_operand")])))]
2971 if (poly_int_rtx_p (operands[2], &val)
2972 && known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
2974 /* The last element can be extracted with a LASTB and a false
2976 rtx sel = aarch64_pfalse_reg (<VPRED>mode);
2977 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
2980 if (!CONST_INT_P (operands[2]))
2982 /* Create an index with operand[2] as the base and -1 as the step.
2983 It will then be zero for the element we care about. */
2984 rtx index = gen_lowpart (<VEL_INT>mode, operands[2]);
2985 index = force_reg (<VEL_INT>mode, index);
2986 rtx series = gen_reg_rtx (<V_INT_EQUIV>mode);
2987 emit_insn (gen_vec_series<v_int_equiv> (series, index, constm1_rtx));
2989 /* Get a predicate that is true for only that element. */
2990 rtx zero = CONST0_RTX (<V_INT_EQUIV>mode);
2991 rtx cmp = gen_rtx_EQ (<V_INT_EQUIV>mode, series, zero);
2992 rtx sel = gen_reg_rtx (<VPRED>mode);
2993 emit_insn (gen_vec_cmp<v_int_equiv><vpred> (sel, cmp, series, zero));
2995 /* Select the element using LASTB. */
2996 emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
3002 ;; Extract element zero. This is a special case because we want to force
3003 ;; the registers to be the same for the second alternative, and then
3004 ;; split the instruction into nothing after RA.
3005 (define_insn_and_split "*vec_extract<mode><Vel>_0"
3006 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3008 (match_operand:SVE_FULL 1 "register_operand" "w, 0, w")
3009 (parallel [(const_int 0)])))]
3012 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
3013 switch (which_alternative)
3016 return "umov\\t%<vwcore>0, %1.<Vetype>[0]";
3020 return "st1\\t{%1.<Vetype>}[0], %0";
3025 "&& reload_completed
3026 && REG_P (operands[0])
3027 && REGNO (operands[0]) == REGNO (operands[1])"
3030 emit_note (NOTE_INSN_DELETED);
3033 [(set_attr "type" "neon_to_gp_q, untyped, neon_store1_one_lane_q")]
3036 ;; Extract an element from the Advanced SIMD portion of the register.
3037 ;; We don't just reuse the aarch64-simd.md pattern because we don't
3038 ;; want any change in lane number on big-endian targets.
3039 (define_insn "*vec_extract<mode><Vel>_v128"
3040 [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=r, w, Utv")
3042 (match_operand:SVE_FULL 1 "register_operand" "w, w, w")
3043 (parallel [(match_operand:SI 2 "const_int_operand")])))]
3045 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 1, 15)"
3047 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
3048 switch (which_alternative)
3051 return "umov\\t%<vwcore>0, %1.<Vetype>[%2]";
3053 return "dup\\t%<Vetype>0, %1.<Vetype>[%2]";
3055 return "st1\\t{%1.<Vetype>}[%2], %0";
3060 [(set_attr "type" "neon_to_gp_q, neon_dup_q, neon_store1_one_lane_q")]
3063 ;; Extract an element in the range of DUP. This pattern allows the
3064 ;; source and destination to be different.
3065 (define_insn "*vec_extract<mode><Vel>_dup"
3066 [(set (match_operand:<VEL> 0 "register_operand" "=w")
3068 (match_operand:SVE_FULL 1 "register_operand" "w")
3069 (parallel [(match_operand:SI 2 "const_int_operand")])))]
3071 && IN_RANGE (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode), 16, 63)"
3073 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
3074 return "dup\t%0.<Vetype>, %1.<Vetype>[%2]";
3078 ;; Extract an element outside the range of DUP. This pattern requires the
3079 ;; source and destination to be the same.
3080 (define_insn "*vec_extract<mode><Vel>_ext"
3081 [(set (match_operand:<VEL> 0 "register_operand" "=w, ?&w")
3083 (match_operand:SVE_FULL 1 "register_operand" "0, w")
3084 (parallel [(match_operand:SI 2 "const_int_operand")])))]
3085 "TARGET_SVE && INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode) >= 64"
3087 operands[0] = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
3088 operands[2] = GEN_INT (INTVAL (operands[2]) * GET_MODE_SIZE (<VEL>mode));
3089 return (which_alternative == 0
3090 ? "ext\t%0.b, %0.b, %0.b, #%2"
3091 : "movprfx\t%0, %1\;ext\t%0.b, %0.b, %1.b, #%2");
3093 [(set_attr "movprfx" "*,yes")]
3096 ;; -------------------------------------------------------------------------
3097 ;; ---- [INT,FP] Extract active element
3098 ;; -------------------------------------------------------------------------
3102 ;; -------------------------------------------------------------------------
3104 ;; Extract the last active element of operand 1 into operand 0.
3105 ;; If no elements are active, extract the last inactive element instead.
3106 (define_insn "@extract_<last_op>_<mode>"
3107 [(set (match_operand:<VEL> 0 "register_operand")
3109 [(match_operand:<VPRED> 1 "register_operand")
3110 (match_operand:SVE_FULL 2 "register_operand")]
3113 {@ [ cons: =0 , 1 , 2 ]
3114 [ ?r , Upl , w ] last<ab>\t%<vwcore>0, %1, %2.<Vetype>
3115 [ w , Upl , w ] last<ab>\t%<Vetype>0, %1, %2.<Vetype>
3119 ;; -------------------------------------------------------------------------
3120 ;; ---- [PRED] Extract index
3121 ;; -------------------------------------------------------------------------
3122 ;; The patterns in this section are synthetic.
3123 ;; -------------------------------------------------------------------------
3125 ;; Handle extractions from a predicate by converting to an integer vector
3126 ;; and extracting from there.
3127 (define_expand "vec_extract<vpred><Vel>"
3128 [(match_operand:<VEL> 0 "register_operand")
3129 (match_operand:<VPRED> 1 "register_operand")
3130 (match_operand:SI 2 "nonmemory_operand")
3131 ;; Dummy operand to which we can attach the iterator.
3132 (reg:SVE_FULL_I V0_REGNUM)]
3135 rtx tmp = gen_reg_rtx (<MODE>mode);
3136 emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
3137 CONST1_RTX (<MODE>mode),
3138 CONST0_RTX (<MODE>mode)));
3139 emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
3144 ;; =========================================================================
3145 ;; == Unary arithmetic
3146 ;; =========================================================================
3148 ;; -------------------------------------------------------------------------
3149 ;; ---- [INT] General unary arithmetic corresponding to rtx codes
3150 ;; -------------------------------------------------------------------------
3155 ;; - CNT (= popcount)
3156 ;; - RBIT (= bitreverse)
3159 ;; -------------------------------------------------------------------------
3161 (define_expand "ctz<mode>2"
3162 [(set (match_operand:SVE_I 0 "register_operand")
3166 (match_operand:SVE_I 1 "register_operand"))]
3170 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
3171 rtx temp = gen_reg_rtx (<MODE>mode);
3172 emit_insn (gen_aarch64_pred_rbit<mode> (temp, pred, operands[1]));
3173 emit_insn (gen_aarch64_pred_clz<mode> (operands[0], pred, temp));
3178 ;; Unpredicated integer unary arithmetic.
3179 (define_expand "<optab><mode>2"
3180 [(set (match_operand:SVE_I 0 "register_operand")
3183 (SVE_INT_UNARY:SVE_I
3184 (match_operand:SVE_I 1 "register_operand"))]
3188 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3192 ;; Integer unary arithmetic predicated with a PTRUE.
3193 (define_insn "@aarch64_pred_<optab><mode>"
3194 [(set (match_operand:SVE_VDQ_I 0 "register_operand")
3196 [(match_operand:<VPRED> 1 "register_operand")
3197 (SVE_INT_UNARY:SVE_VDQ_I
3198 (match_operand:SVE_VDQ_I 2 "register_operand"))]
3201 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3202 [ w , Upl , 0 ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
3203 [ ?&w , Upl , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z2.<Vetype>
3207 ;; Predicated integer unary arithmetic with merging.
3208 (define_expand "@cond_<optab><mode>"
3209 [(set (match_operand:SVE_I 0 "register_operand")
3211 [(match_operand:<VPRED> 1 "register_operand")
3212 (SVE_INT_UNARY:SVE_I
3213 (match_operand:SVE_I 2 "register_operand"))
3214 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
3219 ;; Predicated integer unary arithmetic, merging with the first input.
3220 (define_insn "*cond_<optab><mode>_2"
3221 [(set (match_operand:SVE_I 0 "register_operand")
3223 [(match_operand:<VPRED> 1 "register_operand")
3224 (SVE_INT_UNARY:SVE_I
3225 (match_operand:SVE_I 2 "register_operand"))
3229 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3230 [ w , Upl , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3231 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3235 ;; Predicated integer unary arithmetic, merging with an independent value.
3237 ;; The earlyclobber isn't needed for the first alternative, but omitting
3238 ;; it would only help the case in which operands 2 and 3 are the same,
3239 ;; which is handled above rather than here. Marking all the alternatives
3240 ;; as earlyclobber helps to make the instruction more regular to the
3241 ;; register allocator.
3242 (define_insn "*cond_<optab><mode>_any"
3243 [(set (match_operand:SVE_I 0 "register_operand")
3245 [(match_operand:<VPRED> 1 "register_operand")
3246 (SVE_INT_UNARY:SVE_I
3247 (match_operand:SVE_I 2 "register_operand"))
3248 (match_operand:SVE_I 3 "aarch64_simd_reg_or_zero")]
3250 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3251 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3252 [ &w , Upl , w , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3253 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3254 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3259 ;; -------------------------------------------------------------------------
3260 ;; ---- [INT] General unary arithmetic corresponding to unspecs
3261 ;; -------------------------------------------------------------------------
3266 ;; -------------------------------------------------------------------------
3268 ;; Predicated integer unary operations.
3269 (define_insn "@aarch64_pred_<optab><mode>"
3270 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3272 [(match_operand:<VPRED> 1 "register_operand")
3274 [(match_operand:SVE_FULL_I 2 "register_operand")]
3277 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3278 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3279 [ w , Upl , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3280 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3284 ;; Another way of expressing the REVB, REVH and REVW patterns, with this
3285 ;; form being easier for permutes. The predicate mode determines the number
3286 ;; of lanes and the data mode decides the granularity of the reversal within
3288 (define_insn "@aarch64_sve_revbhw_<SVE_ALL:mode><PRED_HSD:mode>"
3289 [(set (match_operand:SVE_ALL 0 "register_operand")
3291 [(match_operand:PRED_HSD 1 "register_operand")
3293 [(match_operand:SVE_ALL 2 "register_operand")]
3296 "TARGET_SVE && <PRED_HSD:elem_bits> > <SVE_ALL:container_bits>"
3297 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3298 [ w , Upl , 0 ; * ] rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3299 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;rev<SVE_ALL:Vcwtype>\t%0.<PRED_HSD:Vetype>, %1/m, %2.<PRED_HSD:Vetype>
3303 ;; Predicated integer unary operations with merging.
3304 (define_insn "@cond_<optab><mode>"
3305 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3307 [(match_operand:<VPRED> 1 "register_operand")
3309 [(match_operand:SVE_FULL_I 2 "register_operand")]
3311 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3313 "TARGET_SVE && <elem_bits> >= <min_elem_bits>"
3314 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3315 [ w , Upl , w , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3316 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3317 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3321 ;; -------------------------------------------------------------------------
3322 ;; ---- [INT] Sign and zero extension
3323 ;; -------------------------------------------------------------------------
3331 ;; -------------------------------------------------------------------------
3333 ;; Unpredicated sign and zero extension from a narrower mode.
3334 (define_expand "<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3335 [(set (match_operand:SVE_HSDI 0 "register_operand")
3338 (ANY_EXTEND:SVE_HSDI
3339 (match_operand:SVE_PARTIAL_I 1 "register_operand"))]
3341 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3343 operands[2] = aarch64_ptrue_reg (<SVE_HSDI:VPRED>mode);
3347 ;; Predicated sign and zero extension from a narrower mode.
3348 (define_insn "*<optab><SVE_PARTIAL_I:mode><SVE_HSDI:mode>2"
3349 [(set (match_operand:SVE_HSDI 0 "register_operand")
3351 [(match_operand:<SVE_HSDI:VPRED> 1 "register_operand")
3352 (ANY_EXTEND:SVE_HSDI
3353 (match_operand:SVE_PARTIAL_I 2 "register_operand"))]
3355 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3356 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3357 [ w , Upl , 0 ; * ] <su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3358 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>xt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
3362 ;; Predicated truncate-and-sign-extend operations.
3363 (define_insn "@aarch64_pred_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3364 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
3365 (unspec:SVE_FULL_HSDI
3366 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
3367 (sign_extend:SVE_FULL_HSDI
3368 (truncate:SVE_PARTIAL_I
3369 (match_operand:SVE_FULL_HSDI 2 "register_operand")))]
3372 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3373 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3374 [ w , Upl , 0 ; * ] sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3375 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3379 ;; Predicated truncate-and-sign-extend operations with merging.
3380 (define_insn "@aarch64_cond_sxt<SVE_FULL_HSDI:mode><SVE_PARTIAL_I:mode>"
3381 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
3382 (unspec:SVE_FULL_HSDI
3383 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
3384 (sign_extend:SVE_FULL_HSDI
3385 (truncate:SVE_PARTIAL_I
3386 (match_operand:SVE_FULL_HSDI 2 "register_operand")))
3387 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
3390 && (~<SVE_FULL_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3391 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3392 [ w , Upl , w , 0 ; * ] sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3393 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3394 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;sxt<SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
3398 ;; Predicated truncate-and-zero-extend operations, merging with the
3401 ;; The canonical form of this operation is an AND of a constant rather
3402 ;; than (zero_extend (truncate ...)).
3403 (define_insn "*cond_uxt<mode>_2"
3404 [(set (match_operand:SVE_I 0 "register_operand")
3406 [(match_operand:<VPRED> 1 "register_operand")
3408 (match_operand:SVE_I 2 "register_operand")
3409 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3413 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3414 [ w , Upl , 0 ; * ] uxt%e3\t%0.<Vetype>, %1/m, %0.<Vetype>
3415 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3419 ;; Predicated truncate-and-zero-extend operations, merging with an
3420 ;; independent value.
3422 ;; The earlyclobber isn't needed for the first alternative, but omitting
3423 ;; it would only help the case in which operands 2 and 4 are the same,
3424 ;; which is handled above rather than here. Marking all the alternatives
3425 ;; as early-clobber helps to make the instruction more regular to the
3426 ;; register allocator.
3427 (define_insn "*cond_uxt<mode>_any"
3428 [(set (match_operand:SVE_I 0 "register_operand")
3430 [(match_operand:<VPRED> 1 "register_operand")
3432 (match_operand:SVE_I 2 "register_operand")
3433 (match_operand:SVE_I 3 "aarch64_sve_uxt_immediate"))
3434 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3436 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
3437 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
3438 [ &w , Upl , w , 0 ; * ] uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3439 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3440 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %4\;uxt%e3\t%0.<Vetype>, %1/m, %2.<Vetype>
3444 ;; -------------------------------------------------------------------------
3445 ;; ---- [INT] Truncation
3446 ;; -------------------------------------------------------------------------
3447 ;; The patterns in this section are synthetic.
3448 ;; -------------------------------------------------------------------------
3450 ;; Truncate to a partial SVE vector from either a full vector or a
3451 ;; wider partial vector. This is a no-op, because we can just ignore
3452 ;; the unused upper bits of the source.
3453 (define_insn_and_split "trunc<SVE_HSDI:mode><SVE_PARTIAL_I:mode>2"
3454 [(set (match_operand:SVE_PARTIAL_I 0 "register_operand" "=w")
3455 (truncate:SVE_PARTIAL_I
3456 (match_operand:SVE_HSDI 1 "register_operand" "w")))]
3457 "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
3459 "&& reload_completed"
3460 [(set (match_dup 0) (match_dup 1))]
3462 operands[1] = aarch64_replace_reg_mode (operands[1],
3463 <SVE_PARTIAL_I:MODE>mode);
3467 ;; -------------------------------------------------------------------------
3468 ;; ---- [INT] Logical inverse
3469 ;; -------------------------------------------------------------------------
3472 ;; -------------------------------------------------------------------------
3474 ;; Logical inverse, predicated with a ptrue.
3475 (define_expand "@aarch64_ptrue_cnot<mode>"
3476 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3479 [(match_operand:<VPRED> 1 "register_operand")
3480 (const_int SVE_KNOWN_PTRUE)
3482 (match_operand:SVE_FULL_I 2 "register_operand")
3490 operands[3] = CONST0_RTX (<MODE>mode);
3491 operands[4] = CONST1_RTX (<MODE>mode);
3495 (define_insn "*cnot<mode>"
3496 [(set (match_operand:SVE_I 0 "register_operand")
3499 [(match_operand:<VPRED> 1 "register_operand")
3500 (const_int SVE_KNOWN_PTRUE)
3502 (match_operand:SVE_I 2 "register_operand")
3503 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3505 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3509 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3510 [ w , Upl , 0 ; * ] cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3511 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3515 ;; Predicated logical inverse with merging.
3516 (define_expand "@cond_cnot<mode>"
3517 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3519 [(match_operand:<VPRED> 1 "register_operand")
3523 (const_int SVE_KNOWN_PTRUE)
3525 (match_operand:SVE_FULL_I 2 "register_operand")
3531 (match_operand:SVE_FULL_I 3 "aarch64_simd_reg_or_zero")]
3535 operands[4] = CONSTM1_RTX (<VPRED>mode);
3536 operands[5] = CONST0_RTX (<MODE>mode);
3537 operands[6] = CONST1_RTX (<MODE>mode);
3541 ;; Predicated logical inverse, merging with the first input.
3542 (define_insn_and_rewrite "*cond_cnot<mode>_2"
3543 [(set (match_operand:SVE_I 0 "register_operand")
3545 [(match_operand:<VPRED> 1 "register_operand")
3546 ;; Logical inverse of operand 2 (as above).
3550 (const_int SVE_KNOWN_PTRUE)
3552 (match_operand:SVE_I 2 "register_operand")
3553 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3555 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3561 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3562 [ w , Upl , 0 ; * ] cnot\t%0.<Vetype>, %1/m, %0.<Vetype>
3563 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3565 "&& !CONSTANT_P (operands[5])"
3567 operands[5] = CONSTM1_RTX (<VPRED>mode);
3571 ;; Predicated logical inverse, merging with an independent value.
3573 ;; The earlyclobber isn't needed for the first alternative, but omitting
3574 ;; it would only help the case in which operands 2 and 6 are the same,
3575 ;; which is handled above rather than here. Marking all the alternatives
3576 ;; as earlyclobber helps to make the instruction more regular to the
3577 ;; register allocator.
3578 (define_insn_and_rewrite "*cond_cnot<mode>_any"
3579 [(set (match_operand:SVE_I 0 "register_operand")
3581 [(match_operand:<VPRED> 1 "register_operand")
3582 ;; Logical inverse of operand 2 (as above).
3586 (const_int SVE_KNOWN_PTRUE)
3588 (match_operand:SVE_I 2 "register_operand")
3589 (match_operand:SVE_I 3 "aarch64_simd_imm_zero"))]
3591 (match_operand:SVE_I 4 "aarch64_simd_imm_one")
3594 (match_operand:SVE_I 6 "aarch64_simd_reg_or_zero")]
3596 "TARGET_SVE && !rtx_equal_p (operands[2], operands[6])"
3597 {@ [ cons: =0 , 1 , 2 , 6 ; attrs: movprfx ]
3598 [ &w , Upl , w , 0 ; * ] cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3599 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3600 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %6\;cnot\t%0.<Vetype>, %1/m, %2.<Vetype>
3602 "&& !CONSTANT_P (operands[5])"
3604 operands[5] = CONSTM1_RTX (<VPRED>mode);
3608 ;; -------------------------------------------------------------------------
3609 ;; ---- [FP<-INT] General unary arithmetic that maps to unspecs
3610 ;; -------------------------------------------------------------------------
3613 ;; -------------------------------------------------------------------------
3615 ;; Unpredicated unary operations that take an integer and return a float.
3616 (define_insn "@aarch64_sve_<optab><mode>"
3617 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3619 [(match_operand:<V_INT_EQUIV> 1 "register_operand" "w")]
3622 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3625 ;; -------------------------------------------------------------------------
3626 ;; ---- [FP] General unary arithmetic corresponding to unspecs
3627 ;; -------------------------------------------------------------------------
3642 ;; -------------------------------------------------------------------------
3644 ;; Unpredicated floating-point unary operations.
3645 (define_insn "@aarch64_sve_<optab><mode>"
3646 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
3648 [(match_operand:SVE_FULL_F 1 "register_operand" "w")]
3651 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>"
3654 ;; Unpredicated floating-point unary operations.
3655 (define_expand "<optab><mode>2"
3656 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3659 (const_int SVE_RELAXED_GP)
3660 (match_operand:SVE_FULL_F 1 "register_operand")]
3661 SVE_COND_FP_UNARY_OPTAB))]
3664 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3668 ;; Predicated floating-point unary operations.
3669 (define_insn "@aarch64_pred_<optab><mode>"
3670 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3672 [(match_operand:<VPRED> 1 "register_operand")
3673 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3674 (match_operand:SVE_FULL_F 2 "register_operand")]
3675 SVE_COND_FP_UNARY))]
3677 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3678 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3679 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3683 ;; Predicated floating-point unary arithmetic with merging.
3684 (define_expand "@cond_<optab><mode>"
3685 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3687 [(match_operand:<VPRED> 1 "register_operand")
3690 (const_int SVE_STRICT_GP)
3691 (match_operand:SVE_FULL_F 2 "register_operand")]
3693 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3698 ;; Predicated floating-point unary arithmetic, merging with the first input.
3699 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
3700 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3702 [(match_operand:<VPRED> 1 "register_operand")
3705 (const_int SVE_RELAXED_GP)
3706 (match_operand:SVE_FULL_F 2 "register_operand")]
3711 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3712 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3713 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3715 "&& !rtx_equal_p (operands[1], operands[3])"
3717 operands[3] = copy_rtx (operands[1]);
3721 (define_insn "*cond_<optab><mode>_2_strict"
3722 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3724 [(match_operand:<VPRED> 1 "register_operand")
3727 (const_int SVE_STRICT_GP)
3728 (match_operand:SVE_FULL_F 2 "register_operand")]
3733 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3734 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>
3735 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3739 ;; Predicated floating-point unary arithmetic, merging with an independent
3742 ;; The earlyclobber isn't needed for the first alternative, but omitting
3743 ;; it would only help the case in which operands 2 and 3 are the same,
3744 ;; which is handled above rather than here. Marking all the alternatives
3745 ;; as earlyclobber helps to make the instruction more regular to the
3746 ;; register allocator.
3747 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
3748 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3750 [(match_operand:<VPRED> 1 "register_operand")
3753 (const_int SVE_RELAXED_GP)
3754 (match_operand:SVE_FULL_F 2 "register_operand")]
3756 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3758 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3759 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3760 [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3761 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3762 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3764 "&& !rtx_equal_p (operands[1], operands[4])"
3766 operands[4] = copy_rtx (operands[1]);
3770 (define_insn "*cond_<optab><mode>_any_strict"
3771 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3773 [(match_operand:<VPRED> 1 "register_operand")
3776 (const_int SVE_STRICT_GP)
3777 (match_operand:SVE_FULL_F 2 "register_operand")]
3779 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
3781 "TARGET_SVE && !rtx_equal_p (operands[2], operands[3])"
3782 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3783 [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3784 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3785 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3789 ;; -------------------------------------------------------------------------
3790 ;; ---- [FP] Square root
3791 ;; -------------------------------------------------------------------------
3793 (define_expand "sqrt<mode>2"
3794 [(set (match_operand:SVE_FULL_F 0 "register_operand")
3797 (const_int SVE_RELAXED_GP)
3798 (match_operand:SVE_FULL_F 1 "register_operand")]
3799 UNSPEC_COND_FSQRT))]
3802 if (aarch64_emit_approx_sqrt (operands[0], operands[1], false))
3804 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
3807 ;; -------------------------------------------------------------------------
3808 ;; ---- [FP] Reciprocal square root
3809 ;; -------------------------------------------------------------------------
3811 (define_expand "rsqrt<mode>2"
3812 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3813 (unspec:SVE_FULL_SDF
3814 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3818 aarch64_emit_approx_sqrt (operands[0], operands[1], true);
3822 (define_expand "@aarch64_rsqrte<mode>"
3823 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3824 (unspec:SVE_FULL_SDF
3825 [(match_operand:SVE_FULL_SDF 1 "register_operand")]
3830 (define_expand "@aarch64_rsqrts<mode>"
3831 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3832 (unspec:SVE_FULL_SDF
3833 [(match_operand:SVE_FULL_SDF 1 "register_operand")
3834 (match_operand:SVE_FULL_SDF 2 "register_operand")]
3839 ;; -------------------------------------------------------------------------
3840 ;; ---- [PRED] Inverse
3841 ;; -------------------------------------------------------------------------
3844 ;; -------------------------------------------------------------------------
3846 ;; Unpredicated predicate inverse.
3847 (define_expand "one_cmpl<mode>2"
3848 [(set (match_operand:PRED_ALL 0 "register_operand")
3850 (not:PRED_ALL (match_operand:PRED_ALL 1 "register_operand"))
3854 operands[2] = aarch64_ptrue_reg (<MODE>mode);
3858 ;; Predicated predicate inverse.
3859 (define_insn "*one_cmpl<mode>3"
3860 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
3862 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
3863 (match_operand:PRED_ALL 1 "register_operand" "Upa")))]
3865 "not\t%0.b, %1/z, %2.b"
3868 ;; =========================================================================
3869 ;; == Binary arithmetic
3870 ;; =========================================================================
3872 ;; -------------------------------------------------------------------------
3873 ;; ---- [INT] General binary arithmetic corresponding to rtx codes
3874 ;; -------------------------------------------------------------------------
3876 ;; - ADD (merging form only)
3877 ;; - AND (merging form only)
3878 ;; - ASR (merging form only)
3879 ;; - EOR (merging form only)
3880 ;; - LSL (merging form only)
3881 ;; - LSR (merging form only)
3883 ;; - ORR (merging form only)
3886 ;; - SQADD (SVE2 merging form only)
3887 ;; - SQSUB (SVE2 merging form only)
3888 ;; - SUB (merging form only)
3891 ;; - UQADD (SVE2 merging form only)
3892 ;; - UQSUB (SVE2 merging form only)
3893 ;; -------------------------------------------------------------------------
3895 ;; Unpredicated integer binary operations that have an immediate form.
3896 (define_expand "<optab><mode>3"
3897 [(set (match_operand:SVE_I 0 "register_operand")
3900 (SVE_INT_BINARY_MULTI:SVE_I
3901 (match_operand:SVE_I 1 "register_operand")
3902 (match_operand:SVE_I 2 "aarch64_sve_<sve_imm_con>_operand"))]
3906 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3910 ;; Unpredicated integer binary operations that have an immediate form.
3911 ;; Advanced SIMD does not support vector DImode MUL, but SVE does.
3912 ;; Make use of the overlap between Z and V registers to implement the V2DI
3913 ;; optab for TARGET_SVE. The mulvnx2di3 expander can
3914 ;; handle the TARGET_SVE2 case transparently.
3915 (define_expand "mul<mode>3"
3916 [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
3917 (unspec:SVE_I_SIMD_DI
3920 (match_operand:SVE_I_SIMD_DI 1 "register_operand")
3921 (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand"))]
3925 /* SVE2 supports the MUL (vectors, unpredicated) form. Emit the simple
3926 pattern for it here rather than splitting off the MULT expander
3930 emit_move_insn (operands[0], gen_rtx_MULT (<MODE>mode,
3931 operands[1], operands[2]));
3934 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
3938 ;; Integer binary operations that have an immediate form, predicated
3939 ;; with a PTRUE. We don't actually need the predicate for the first
3940 ;; and third alternatives, but using Upa or X isn't likely to gain much
3941 ;; and would make the instruction seem less uniform to the register
3943 (define_insn_and_split "@aarch64_pred_<optab><mode>"
3944 [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
3945 (unspec:SVE_I_SIMD_DI
3946 [(match_operand:<VPRED> 1 "register_operand")
3947 (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
3948 (match_operand:SVE_I_SIMD_DI 2 "register_operand")
3949 (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
3952 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3953 [ w , Upl , %0 , <sve_imm_con> ; * ] #
3954 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
3955 [ ?&w , Upl , w , <sve_imm_con> ; yes ] #
3956 [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
3958 ; Split the unpredicated form after reload, so that we don't have
3959 ; the unnecessary PTRUE.
3960 "&& reload_completed
3961 && !register_operand (operands[3], <MODE>mode)"
3963 (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI (match_dup 2) (match_dup 3)))]
3967 ;; Unpredicated binary operations with a constant (post-RA only).
3968 ;; These are generated by splitting a predicated instruction whose
3969 ;; predicate is unused.
3970 (define_insn "*post_ra_<optab><mode>3"
3971 [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand" "=w, ?&w")
3972 (SVE_INT_BINARY_IMM:SVE_I_SIMD_DI
3973 (match_operand:SVE_I_SIMD_DI 1 "register_operand" "0, w")
3974 (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_<sve_imm_con>_immediate")))]
3975 "TARGET_SVE && reload_completed"
3977 <sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2
3978 movprfx\t%Z0, %Z1\;<sve_int_op>\t%Z0.<Vetype>, %Z0.<Vetype>, #%<sve_imm_prefix>2"
3979 [(set_attr "movprfx" "*,yes")]
3982 ;; Predicated integer operations with merging.
3983 (define_expand "@cond_<optab><mode>"
3984 [(set (match_operand:SVE_I 0 "register_operand")
3986 [(match_operand:<VPRED> 1 "register_operand")
3987 (SVE_INT_BINARY:SVE_I
3988 (match_operand:SVE_I 2 "register_operand")
3989 (match_operand:SVE_I 3 "<sve_pred_int_rhs2_operand>"))
3990 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
3995 ;; Predicated integer operations, merging with the first input.
3996 (define_insn "*cond_<optab><mode>_2"
3997 [(set (match_operand:SVE_I 0 "register_operand")
3999 [(match_operand:<VPRED> 1 "register_operand")
4000 (SVE_INT_BINARY:SVE_I
4001 (match_operand:SVE_I 2 "register_operand")
4002 (match_operand:SVE_I 3 "register_operand"))
4006 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4007 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4008 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4012 ;; Predicated integer operations, merging with the second input.
4013 (define_insn "*cond_<optab><mode>_3"
4014 [(set (match_operand:SVE_I 0 "register_operand")
4016 [(match_operand:<VPRED> 1 "register_operand")
4017 (SVE_INT_BINARY:SVE_I
4018 (match_operand:SVE_I 2 "register_operand")
4019 (match_operand:SVE_I 3 "register_operand"))
4023 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4024 [ w , Upl , w , 0 ; * ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4025 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4029 ;; Predicated integer operations, merging with an independent value.
4030 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4031 [(set (match_operand:SVE_I 0 "register_operand")
4033 [(match_operand:<VPRED> 1 "register_operand")
4034 (SVE_INT_BINARY:SVE_I
4035 (match_operand:SVE_I 2 "register_operand")
4036 (match_operand:SVE_I 3 "register_operand"))
4037 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4040 && !rtx_equal_p (operands[2], operands[4])
4041 && !rtx_equal_p (operands[3], operands[4])"
4042 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
4043 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4044 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4045 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4046 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4047 [ ?&w , Upl , w , w , w ] #
4049 "&& reload_completed
4050 && register_operand (operands[4], <MODE>mode)
4051 && !rtx_equal_p (operands[0], operands[4])"
4053 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4054 operands[4], operands[1]));
4055 operands[4] = operands[2] = operands[0];
4057 [(set_attr "movprfx" "yes")]
4060 ;; -------------------------------------------------------------------------
4061 ;; ---- [INT] Addition
4062 ;; -------------------------------------------------------------------------
4074 ;; -------------------------------------------------------------------------
4076 (define_insn "add<mode>3"
4077 [(set (match_operand:SVE_I 0 "register_operand")
4079 (match_operand:SVE_I 1 "register_operand")
4080 (match_operand:SVE_I 2 "aarch64_sve_add_operand")))]
4082 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4083 [ w , %0 , vsa ; * ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
4084 [ w , 0 , vsn ; * ] sub\t%0.<Vetype>, %0.<Vetype>, #%N2
4085 [ w , 0 , vsi ; * ] << aarch64_output_sve_vector_inc_dec ("%0.<Vetype>", operands[2]);
4086 [ ?w , w , vsa ; yes ] movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
4087 [ ?w , w , vsn ; yes ] movprfx\t%0, %1\;sub\t%0.<Vetype>, %0.<Vetype>, #%N2
4088 [ w , w , w ; * ] add\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4092 ;; Merging forms are handled through SVE_INT_BINARY.
4094 ;; -------------------------------------------------------------------------
4095 ;; ---- [INT] Subtraction
4096 ;; -------------------------------------------------------------------------
4100 ;; -------------------------------------------------------------------------
4102 (define_insn "sub<mode>3"
4103 [(set (match_operand:SVE_I 0 "register_operand")
4105 (match_operand:SVE_I 1 "aarch64_sve_arith_operand")
4106 (match_operand:SVE_I 2 "register_operand")))]
4108 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4109 [ w , w , w ; * ] sub\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4110 [ w , vsa , 0 ; * ] subr\t%0.<Vetype>, %0.<Vetype>, #%D1
4111 [ ?&w , vsa , w ; yes ] movprfx\t%0, %2\;subr\t%0.<Vetype>, %0.<Vetype>, #%D1
4115 ;; Merging forms are handled through SVE_INT_BINARY.
4117 ;; -------------------------------------------------------------------------
4118 ;; ---- [INT] Take address
4119 ;; -------------------------------------------------------------------------
4122 ;; -------------------------------------------------------------------------
4124 ;; An unshifted and unscaled ADR. This is functionally equivalent to an ADD,
4125 ;; but the svadrb intrinsics should preserve the user's choice.
4126 (define_insn "@aarch64_adr<mode>"
4127 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
4128 (unspec:SVE_FULL_SDI
4129 [(match_operand:SVE_FULL_SDI 1 "register_operand" "w")
4130 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")]
4132 "TARGET_SVE && TARGET_NON_STREAMING"
4133 "adr\t%0.<Vetype>, [%1.<Vetype>, %2.<Vetype>]"
4136 ;; Same, but with the offset being sign-extended from the low 32 bits.
4137 (define_insn_and_rewrite "*aarch64_adr_sxtw"
4138 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4140 [(match_operand:VNx2DI 1 "register_operand" "w")
4145 (match_operand:VNx2DI 2 "register_operand" "w")))]
4148 "TARGET_SVE && TARGET_NON_STREAMING"
4149 "adr\t%0.d, [%1.d, %2.d, sxtw]"
4150 "&& !CONSTANT_P (operands[3])"
4152 operands[3] = CONSTM1_RTX (VNx2BImode);
4156 ;; Same, but with the offset being zero-extended from the low 32 bits.
4157 (define_insn "*aarch64_adr_uxtw_unspec"
4158 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4160 [(match_operand:VNx2DI 1 "register_operand" "w")
4162 (match_operand:VNx2DI 2 "register_operand" "w")
4163 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))]
4165 "TARGET_SVE && TARGET_NON_STREAMING"
4166 "adr\t%0.d, [%1.d, %2.d, uxtw]"
4169 ;; Same, matching as a PLUS rather than unspec.
4170 (define_insn "*aarch64_adr_uxtw_and"
4171 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4174 (match_operand:VNx2DI 2 "register_operand" "w")
4175 (match_operand:VNx2DI 3 "aarch64_sve_uxtw_immediate"))
4176 (match_operand:VNx2DI 1 "register_operand" "w")))]
4177 "TARGET_SVE && TARGET_NON_STREAMING"
4178 "adr\t%0.d, [%1.d, %2.d, uxtw]"
4181 ;; ADR with a nonzero shift.
4182 (define_expand "@aarch64_adr<mode>_shift"
4183 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4185 (unspec:SVE_FULL_SDI
4187 (ashift:SVE_FULL_SDI
4188 (match_operand:SVE_FULL_SDI 2 "register_operand")
4189 (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
4191 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
4192 "TARGET_SVE && TARGET_NON_STREAMING"
4194 operands[4] = CONSTM1_RTX (<VPRED>mode);
4198 (define_insn_and_rewrite "*aarch64_adr<mode>_shift"
4199 [(set (match_operand:SVE_24I 0 "register_operand" "=w")
4204 (match_operand:SVE_24I 2 "register_operand" "w")
4205 (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
4207 (match_operand:SVE_24I 1 "register_operand" "w")))]
4208 "TARGET_SVE && TARGET_NON_STREAMING"
4209 "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
4210 "&& !CONSTANT_P (operands[4])"
4212 operands[4] = CONSTM1_RTX (<VPRED>mode);
4216 ;; Same, but with the index being sign-extended from the low 32 bits.
4217 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
4218 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4227 (match_operand:VNx2DI 2 "register_operand" "w")))]
4229 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
4231 (match_operand:VNx2DI 1 "register_operand" "w")))]
4232 "TARGET_SVE && TARGET_NON_STREAMING"
4233 "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
4234 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4236 operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
4240 ;; Same, but with the index being zero-extended from the low 32 bits.
4241 (define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
4242 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4248 (match_operand:VNx2DI 2 "register_operand" "w")
4249 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
4250 (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
4252 (match_operand:VNx2DI 1 "register_operand" "w")))]
4253 "TARGET_SVE && TARGET_NON_STREAMING"
4254 "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
4255 "&& !CONSTANT_P (operands[5])"
4257 operands[5] = CONSTM1_RTX (VNx2BImode);
4261 ;; -------------------------------------------------------------------------
4262 ;; ---- [INT] Absolute difference
4263 ;; -------------------------------------------------------------------------
4267 ;; -------------------------------------------------------------------------
4269 ;; Unpredicated integer absolute difference.
4270 (define_expand "<su>abd<mode>3"
4271 [(use (match_operand:SVE_I 0 "register_operand"))
4273 (match_operand:SVE_I 1 "register_operand")
4274 (match_operand:SVE_I 2 "register_operand"))]
4277 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
4278 emit_insn (gen_aarch64_pred_<su>abd<mode> (operands[0], pred, operands[1],
4284 ;; Predicated integer absolute difference.
4285 (define_insn "@aarch64_pred_<su>abd<mode>"
4286 [(set (match_operand:SVE_I 0 "register_operand")
4289 [(match_operand:<VPRED> 1 "register_operand")
4291 (match_operand:SVE_I 2 "register_operand")
4292 (match_operand:SVE_I 3 "register_operand"))]
4301 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4302 [ w , Upl , %0 , w ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4303 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4307 (define_expand "@aarch64_cond_<su>abd<mode>"
4308 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4310 [(match_operand:<VPRED> 1 "register_operand")
4315 (match_operand:SVE_FULL_I 2 "register_operand")
4316 (match_operand:SVE_FULL_I 3 "register_operand"))]
4320 (<max_opp>:SVE_FULL_I
4324 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4328 if (rtx_equal_p (operands[3], operands[4]))
4329 std::swap (operands[2], operands[3]);
4332 ;; Predicated integer absolute difference, merging with the first input.
4333 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_2"
4334 [(set (match_operand:SVE_I 0 "register_operand")
4336 [(match_operand:<VPRED> 1 "register_operand")
4341 (match_operand:SVE_I 2 "register_operand")
4342 (match_operand:SVE_I 3 "register_operand"))]
4353 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4354 [ w , Upl , 0 , w ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4355 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4357 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4359 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4363 ;; Predicated integer absolute difference, merging with the second input.
4364 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_3"
4365 [(set (match_operand:SVE_I 0 "register_operand")
4367 [(match_operand:<VPRED> 1 "register_operand")
4372 (match_operand:SVE_I 2 "register_operand")
4373 (match_operand:SVE_I 3 "register_operand"))]
4384 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4385 [ w , Upl , w , 0 ; * ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4386 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4388 "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
4390 operands[4] = operands[5] = CONSTM1_RTX (<VPRED>mode);
4394 ;; Predicated integer absolute difference, merging with an independent value.
4395 (define_insn_and_rewrite "*aarch64_cond_<su>abd<mode>_any"
4396 [(set (match_operand:SVE_I 0 "register_operand")
4398 [(match_operand:<VPRED> 1 "register_operand")
4403 (match_operand:SVE_I 2 "register_operand")
4404 (match_operand:SVE_I 3 "register_operand"))]
4412 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4415 && !rtx_equal_p (operands[2], operands[4])
4416 && !rtx_equal_p (operands[3], operands[4])"
4417 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
4418 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4419 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4420 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4421 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4422 [ ?&w , Upl , w , w , w ] #
4426 if (!CONSTANT_P (operands[5]) || !CONSTANT_P (operands[6]))
4427 operands[5] = operands[6] = CONSTM1_RTX (<VPRED>mode);
4428 else if (reload_completed
4429 && register_operand (operands[4], <MODE>mode)
4430 && !rtx_equal_p (operands[0], operands[4]))
4432 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4433 operands[4], operands[1]));
4434 operands[4] = operands[2] = operands[0];
4439 [(set_attr "movprfx" "yes")]
4442 ;; -------------------------------------------------------------------------
4443 ;; ---- [INT] Saturating addition and subtraction
4444 ;; -------------------------------------------------------------------------
4449 ;; -------------------------------------------------------------------------
4451 ;; Unpredicated saturating signed addition and subtraction.
4452 (define_insn "<su_optab>s<addsub><mode>3"
4453 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4454 (SBINQOPS:SVE_FULL_I
4455 (match_operand:SVE_FULL_I 1 "register_operand")
4456 (match_operand:SVE_FULL_I 2 "aarch64_sve_sqadd_operand")))]
4458 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4459 [ w , 0 , vsQ ; * ] <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4460 [ w , 0 , vsS ; * ] <binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4461 [ ?&w , w , vsQ ; yes ] movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4462 [ ?&w , w , vsS ; yes ] movprfx\t%0, %1\;<binqops_op_rev>\t%0.<Vetype>, %0.<Vetype>, #%N2
4463 [ w , w , w ; * ] <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4467 ;; Unpredicated saturating unsigned addition and subtraction.
4468 (define_insn "<su_optab>s<addsub><mode>3"
4469 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4470 (UBINQOPS:SVE_FULL_I
4471 (match_operand:SVE_FULL_I 1 "register_operand")
4472 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_operand")))]
4474 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4475 [ w , 0 , vsa ; * ] <binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4476 [ ?&w , w , vsa ; yes ] movprfx\t%0, %1\;<binqops_op>\t%0.<Vetype>, %0.<Vetype>, #%D2
4477 [ w , w , w ; * ] <binqops_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>
4481 ;; -------------------------------------------------------------------------
4482 ;; ---- [INT] Highpart multiplication
4483 ;; -------------------------------------------------------------------------
4487 ;; -------------------------------------------------------------------------
4489 ;; Unpredicated highpart multiplication.
4490 (define_expand "<su>mul<mode>3_highpart"
4491 [(set (match_operand:SVE_I 0 "register_operand")
4495 [(match_operand:SVE_I 1 "register_operand")
4496 (match_operand:SVE_I 2 "register_operand")]
4501 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4505 ;; Predicated highpart multiplication.
4506 (define_insn "@aarch64_pred_<optab><mode>"
4507 [(set (match_operand:SVE_I 0 "register_operand")
4509 [(match_operand:<VPRED> 1 "register_operand")
4511 [(match_operand:SVE_I 2 "register_operand")
4512 (match_operand:SVE_I 3 "register_operand")]
4516 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4517 [ w , Upl , %0 , w ; * ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4518 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4522 ;; Predicated highpart multiplications with merging.
4523 (define_expand "@cond_<optab><mode>"
4524 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4526 [(match_operand:<VPRED> 1 "register_operand")
4528 [(match_operand:SVE_FULL_I 2 "register_operand")
4529 (match_operand:SVE_FULL_I 3 "register_operand")]
4531 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4535 /* Only target code is aware of these operations, so we don't need
4536 to handle the fully-general case. */
4537 gcc_assert (rtx_equal_p (operands[2], operands[4])
4538 || CONSTANT_P (operands[4]));
4541 ;; Predicated highpart multiplications, merging with the first input.
4542 (define_insn "*cond_<optab><mode>_2"
4543 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4545 [(match_operand:<VPRED> 1 "register_operand")
4547 [(match_operand:SVE_FULL_I 2 "register_operand")
4548 (match_operand:SVE_FULL_I 3 "register_operand")]
4553 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4554 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4555 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4559 ;; Predicated highpart multiplications, merging with zero.
4560 (define_insn "*cond_<optab><mode>_z"
4561 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4563 [(match_operand:<VPRED> 1 "register_operand")
4565 [(match_operand:SVE_FULL_I 2 "register_operand")
4566 (match_operand:SVE_FULL_I 3 "register_operand")]
4568 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
4571 {@ [ cons: =0 , 1 , 2 , 3 ]
4572 [ &w , Upl , %0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4573 [ &w , Upl , w , w ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4575 [(set_attr "movprfx" "yes")])
4577 ;; -------------------------------------------------------------------------
4578 ;; ---- [INT] Division
4579 ;; -------------------------------------------------------------------------
4585 ;; -------------------------------------------------------------------------
4587 ;; Unpredicated integer division.
4588 ;; SVE has vector integer divisions, unlike Advanced SIMD.
4589 ;; We can use it with Advanced SIMD modes to expose the V2DI and V4SI
4590 ;; optabs to the midend.
4591 (define_expand "<optab><mode>3"
4592 [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
4593 (unspec:SVE_FULL_SDI_SIMD
4595 (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
4596 (match_operand:SVE_FULL_SDI_SIMD 1 "register_operand")
4597 (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand"))]
4601 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4605 ;; Integer division predicated with a PTRUE.
4606 (define_insn "@aarch64_pred_<optab><mode>"
4607 [(set (match_operand:SVE_FULL_SDI_SIMD 0 "register_operand")
4608 (unspec:SVE_FULL_SDI_SIMD
4609 [(match_operand:<VPRED> 1 "register_operand")
4610 (SVE_INT_BINARY_SD:SVE_FULL_SDI_SIMD
4611 (match_operand:SVE_FULL_SDI_SIMD 2 "register_operand")
4612 (match_operand:SVE_FULL_SDI_SIMD 3 "register_operand"))]
4615 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4616 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
4617 [ w , Upl , w , 0 ; * ] <sve_int_op>r\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z2.<Vetype>
4618 [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
4622 ;; Predicated integer division with merging.
4623 (define_expand "@cond_<optab><mode>"
4624 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4625 (unspec:SVE_FULL_SDI
4626 [(match_operand:<VPRED> 1 "register_operand")
4627 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4628 (match_operand:SVE_FULL_SDI 2 "register_operand")
4629 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4630 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4635 ;; Predicated integer division, merging with the first input.
4636 (define_insn "*cond_<optab><mode>_2"
4637 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4638 (unspec:SVE_FULL_SDI
4639 [(match_operand:<VPRED> 1 "register_operand")
4640 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4641 (match_operand:SVE_FULL_SDI 2 "register_operand")
4642 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4646 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4647 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4648 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4652 ;; Predicated integer division, merging with the second input.
4653 (define_insn "*cond_<optab><mode>_3"
4654 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4655 (unspec:SVE_FULL_SDI
4656 [(match_operand:<VPRED> 1 "register_operand")
4657 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4658 (match_operand:SVE_FULL_SDI 2 "register_operand")
4659 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4663 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4664 [ w , Upl , w , 0 ; * ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4665 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4669 ;; Predicated integer division, merging with an independent value.
4670 (define_insn_and_rewrite "*cond_<optab><mode>_any"
4671 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
4672 (unspec:SVE_FULL_SDI
4673 [(match_operand:<VPRED> 1 "register_operand")
4674 (SVE_INT_BINARY_SD:SVE_FULL_SDI
4675 (match_operand:SVE_FULL_SDI 2 "register_operand")
4676 (match_operand:SVE_FULL_SDI 3 "register_operand"))
4677 (match_operand:SVE_FULL_SDI 4 "aarch64_simd_reg_or_zero")]
4680 && !rtx_equal_p (operands[2], operands[4])
4681 && !rtx_equal_p (operands[3], operands[4])"
4682 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
4683 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4684 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
4685 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4686 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4687 [ ?&w , Upl , w , w , w ] #
4689 "&& reload_completed
4690 && register_operand (operands[4], <MODE>mode)
4691 && !rtx_equal_p (operands[0], operands[4])"
4693 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4694 operands[4], operands[1]));
4695 operands[4] = operands[2] = operands[0];
4697 [(set_attr "movprfx" "yes")]
4700 ;; -------------------------------------------------------------------------
4701 ;; ---- [INT] Binary logical operations
4702 ;; -------------------------------------------------------------------------
4707 ;; -------------------------------------------------------------------------
4709 ;; Unpredicated integer binary logical operations.
4710 (define_insn "<optab><mode>3"
4711 [(set (match_operand:SVE_I 0 "register_operand")
4713 (match_operand:SVE_I 1 "register_operand")
4714 (match_operand:SVE_I 2 "aarch64_sve_logical_operand")))]
4716 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4717 [ w , %0 , vsl ; * ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4718 [ ?w , w , vsl ; yes ] movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
4719 [ w , w , w ; * ] <logical>\t%0.d, %1.d, %2.d
4723 ;; Merging forms are handled through SVE_INT_BINARY.
4725 ;; -------------------------------------------------------------------------
4726 ;; ---- [INT] Binary logical operations (inverted second input)
4727 ;; -------------------------------------------------------------------------
4730 ;; -------------------------------------------------------------------------
4732 ;; Unpredicated BIC; andn named pattern.
4733 (define_expand "andn<mode>3"
4734 [(set (match_operand:SVE_I 0 "register_operand")
4738 (not:SVE_I (match_operand:SVE_I 2 "register_operand"))]
4740 (match_operand:SVE_I 1 "register_operand")))]
4743 operands[3] = CONSTM1_RTX (<VPRED>mode);
4748 (define_insn_and_rewrite "*bic<mode>3"
4749 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4754 (match_operand:SVE_I 2 "register_operand" "w"))]
4756 (match_operand:SVE_I 1 "register_operand" "w")))]
4758 "bic\t%0.d, %1.d, %2.d"
4759 "&& !CONSTANT_P (operands[3])"
4761 operands[3] = CONSTM1_RTX (<VPRED>mode);
4765 ;; Predicated BIC with merging.
4766 (define_expand "@cond_bic<mode>"
4767 [(set (match_operand:SVE_FULL_I 0 "register_operand")
4769 [(match_operand:<VPRED> 1 "register_operand")
4771 (not:SVE_FULL_I (match_operand:SVE_FULL_I 3 "register_operand"))
4772 (match_operand:SVE_FULL_I 2 "register_operand"))
4773 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
4778 ;; Predicated integer BIC, merging with the first input.
4779 (define_insn "*cond_bic<mode>_2"
4780 [(set (match_operand:SVE_I 0 "register_operand")
4782 [(match_operand:<VPRED> 1 "register_operand")
4785 (match_operand:SVE_I 3 "register_operand"))
4786 (match_operand:SVE_I 2 "register_operand"))
4790 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4791 [ w , Upl , 0 , w ; * ] bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4792 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4796 ;; Predicated integer BIC, merging with an independent value.
4797 (define_insn_and_rewrite "*cond_bic<mode>_any"
4798 [(set (match_operand:SVE_I 0 "register_operand")
4800 [(match_operand:<VPRED> 1 "register_operand")
4803 (match_operand:SVE_I 3 "register_operand"))
4804 (match_operand:SVE_I 2 "register_operand"))
4805 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4807 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4808 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
4809 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4810 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4811 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;bic\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4812 [ ?&w , Upl , w , w , w ] #
4814 "&& reload_completed
4815 && register_operand (operands[4], <MODE>mode)
4816 && !rtx_equal_p (operands[0], operands[4])"
4818 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4819 operands[4], operands[1]));
4820 operands[4] = operands[2] = operands[0];
4822 [(set_attr "movprfx" "yes")]
4825 ;; -------------------------------------------------------------------------
4826 ;; ---- [INT] Shifts (rounding towards -Inf)
4827 ;; -------------------------------------------------------------------------
4835 ;; -------------------------------------------------------------------------
4837 ;; Unpredicated shift by a scalar, which expands into one of the vector
4839 (define_expand "<ASHIFT:optab><mode>3"
4840 [(set (match_operand:SVE_I 0 "register_operand")
4842 (match_operand:SVE_I 1 "register_operand")
4843 (match_operand:<VEL> 2 "general_operand")))]
4847 if (CONST_INT_P (operands[2]))
4849 amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
4850 if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
4851 amount = force_reg (<MODE>mode, amount);
4855 amount = convert_to_mode (<VEL>mode, operands[2], 0);
4856 amount = expand_vector_broadcast (<MODE>mode, amount);
4858 emit_insn (gen_v<optab><mode>3 (operands[0], operands[1], amount));
4863 ;; Unpredicated shift by a vector.
4864 (define_expand "v<optab><mode>3"
4865 [(set (match_operand:SVE_I 0 "register_operand")
4869 (match_operand:SVE_I 1 "register_operand")
4870 (match_operand:SVE_I 2 "aarch64_sve_<lr>shift_operand"))]
4874 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
4878 ;; Shift by a vector, predicated with a PTRUE. We don't actually need
4879 ;; the predicate for the first alternative, but using Upa or X isn't
4880 ;; likely to gain much and would make the instruction seem less uniform
4881 ;; to the register allocator.
4882 (define_insn_and_split "@aarch64_pred_<optab><mode>"
4883 [(set (match_operand:SVE_I 0 "register_operand")
4885 [(match_operand:<VPRED> 1 "register_operand")
4887 (match_operand:SVE_I 2 "register_operand")
4888 (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))]
4891 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
4892 [ w , Upl , w , D<lr> ; * ] #
4893 [ w , Upl , 0 , w ; * ] <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4894 [ w , Upl , w , 0 ; * ] <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
4895 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
4897 "&& reload_completed
4898 && !register_operand (operands[3], <MODE>mode)"
4899 [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
4903 ;; Unpredicated shift operations by a constant (post-RA only).
4904 ;; These are generated by splitting a predicated instruction whose
4905 ;; predicate is unused.
4906 (define_insn "*post_ra_v_ashl<mode>3"
4907 [(set (match_operand:SVE_I 0 "register_operand")
4909 (match_operand:SVE_I 1 "register_operand")
4910 (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
4911 "TARGET_SVE && reload_completed"
4912 {@ [ cons: =0 , 1 , 2 ]
4913 [ w , w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
4914 [ w , w , Dl ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
4918 (define_insn "*post_ra_v_<optab><mode>3"
4919 [(set (match_operand:SVE_I 0 "register_operand" "=w")
4921 (match_operand:SVE_I 1 "register_operand" "w")
4922 (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
4923 "TARGET_SVE && reload_completed"
4924 "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
4927 ;; Predicated integer shift, merging with the first input.
4928 (define_insn "*cond_<optab><mode>_2_const"
4929 [(set (match_operand:SVE_I 0 "register_operand")
4931 [(match_operand:<VPRED> 1 "register_operand")
4933 (match_operand:SVE_I 2 "register_operand")
4934 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4938 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
4939 [ w , Upl , 0 ; * ] <shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4940 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4944 ;; Predicated integer shift, merging with an independent value.
4945 (define_insn_and_rewrite "*cond_<optab><mode>_any_const"
4946 [(set (match_operand:SVE_I 0 "register_operand")
4948 [(match_operand:<VPRED> 1 "register_operand")
4950 (match_operand:SVE_I 2 "register_operand")
4951 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm"))
4952 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
4954 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
4955 {@ [ cons: =0 , 1 , 2 , 4 ]
4956 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4957 [ &w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
4958 [ ?&w , Upl , w , w ] #
4960 "&& reload_completed
4961 && register_operand (operands[4], <MODE>mode)
4962 && !rtx_equal_p (operands[0], operands[4])"
4964 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
4965 operands[4], operands[1]));
4966 operands[4] = operands[2] = operands[0];
4968 [(set_attr "movprfx" "yes")]
4971 ;; Unpredicated shifts of narrow elements by 64-bit amounts.
4972 (define_insn "@aarch64_sve_<sve_int_op><mode>"
4973 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
4974 (unspec:SVE_FULL_BHSI
4975 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")
4976 (match_operand:VNx2DI 2 "register_operand" "w")]
4979 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.d"
4982 ;; Merging predicated shifts of narrow elements by 64-bit amounts.
4983 (define_expand "@cond_<sve_int_op><mode>"
4984 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
4985 (unspec:SVE_FULL_BHSI
4986 [(match_operand:<VPRED> 1 "register_operand")
4987 (unspec:SVE_FULL_BHSI
4988 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
4989 (match_operand:VNx2DI 3 "register_operand")]
4991 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_reg_or_zero")]
4996 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with
4998 (define_insn "*cond_<sve_int_op><mode>_m"
4999 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
5000 (unspec:SVE_FULL_BHSI
5001 [(match_operand:<VPRED> 1 "register_operand")
5002 (unspec:SVE_FULL_BHSI
5003 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
5004 (match_operand:VNx2DI 3 "register_operand")]
5009 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5010 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
5011 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
5015 ;; Predicated shifts of narrow elements by 64-bit amounts, merging with zero.
5016 (define_insn "*cond_<sve_int_op><mode>_z"
5017 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
5018 (unspec:SVE_FULL_BHSI
5019 [(match_operand:<VPRED> 1 "register_operand")
5020 (unspec:SVE_FULL_BHSI
5021 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
5022 (match_operand:VNx2DI 3 "register_operand")]
5024 (match_operand:SVE_FULL_BHSI 4 "aarch64_simd_imm_zero")]
5027 {@ [ cons: =0 , 1 , 2 , 3 ]
5028 [ &w , Upl , 0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
5029 [ &w , Upl , w , w ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.d
5031 [(set_attr "movprfx" "yes")])
5033 ;; -------------------------------------------------------------------------
5034 ;; ---- [INT] Shifts (rounding towards 0)
5035 ;; -------------------------------------------------------------------------
5041 ;; -------------------------------------------------------------------------
5043 ;; Unpredicated ASRD.
5044 (define_expand "sdiv_pow2<mode>3"
5045 [(set (match_operand:SVE_VDQ_I 0 "register_operand")
5049 [(match_operand:SVE_VDQ_I 1 "register_operand")
5050 (match_operand 2 "aarch64_simd_rshift_imm")]
5055 operands[3] = aarch64_ptrue_reg (<VPRED>mode, <MODE>mode);
5060 (define_insn "*sdiv_pow2<mode>3"
5061 [(set (match_operand:SVE_VDQ_I 0 "register_operand")
5063 [(match_operand:<VPRED> 1 "register_operand")
5065 [(match_operand:SVE_VDQ_I 2 "register_operand")
5066 (match_operand:SVE_VDQ_I 3 "aarch64_simd_rshift_imm")]
5070 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
5071 [ w , Upl , 0 ; * ] asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
5072 [ ?&w , Upl , w ; yes ] movprfx\t%Z0, %Z2\;asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
5076 ;; Predicated shift with merging.
5077 (define_expand "@cond_<sve_int_op><mode>"
5078 [(set (match_operand:SVE_I 0 "register_operand")
5080 [(match_operand:<VPRED> 1 "register_operand")
5084 [(match_operand:SVE_I 2 "register_operand")
5085 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5088 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
5092 operands[5] = aarch64_ptrue_reg (<VPRED>mode);
5096 ;; Predicated shift, merging with the first input.
5097 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
5098 [(set (match_operand:SVE_I 0 "register_operand")
5100 [(match_operand:<VPRED> 1 "register_operand")
5104 [(match_operand:SVE_I 2 "register_operand")
5105 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5111 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
5112 [ w , Upl , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5113 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5115 "&& !CONSTANT_P (operands[4])"
5117 operands[4] = CONSTM1_RTX (<VPRED>mode);
5121 ;; Predicated shift, merging with an independent value.
5122 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
5123 [(set (match_operand:SVE_I 0 "register_operand")
5125 [(match_operand:<VPRED> 1 "register_operand")
5129 [(match_operand:SVE_I 2 "register_operand")
5130 (match_operand:SVE_I 3 "aarch64_simd_<lr>shift_imm")]
5133 (match_operand:SVE_I 4 "aarch64_simd_reg_or_zero")]
5135 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5136 {@ [ cons: =0 , 1 , 2 , 4 ]
5137 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5138 [ &w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5139 [ ?&w , Upl , w , w ] #
5141 "&& reload_completed
5142 && register_operand (operands[4], <MODE>mode)
5143 && !rtx_equal_p (operands[0], operands[4])"
5145 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5146 operands[4], operands[1]));
5147 operands[4] = operands[2] = operands[0];
5149 [(set_attr "movprfx" "yes")]
5152 ;; -------------------------------------------------------------------------
5153 ;; ---- [FP<-INT] General binary arithmetic corresponding to unspecs
5154 ;; -------------------------------------------------------------------------
5159 ;; -------------------------------------------------------------------------
5161 (define_expand "ldexp<mode>3"
5162 [(set (match_operand:GPF_HF 0 "register_operand")
5165 (const_int SVE_STRICT_GP)
5166 (match_operand:GPF_HF 1 "register_operand")
5167 (match_operand:<V_INT_EQUIV> 2 "register_operand")]
5168 UNSPEC_COND_FSCALE))]
5171 operands[3] = aarch64_ptrue_reg (<VPRED>mode,
5172 GET_MODE_UNIT_SIZE (<MODE>mode));
5176 ;; Unpredicated floating-point binary operations that take an integer as
5177 ;; their second operand.
5178 (define_insn "@aarch64_sve_<optab><mode>"
5179 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5181 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5182 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
5183 SVE_FP_BINARY_INT))]
5185 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5188 ;; Predicated floating-point binary operations that take an integer
5189 ;; as their second operand.
5190 (define_insn "@aarch64_pred_<optab><mode>"
5191 [(set (match_operand:SVE_FULL_F_SCALAR 0 "register_operand")
5192 (unspec:SVE_FULL_F_SCALAR
5193 [(match_operand:<VPRED> 1 "register_operand")
5194 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5195 (match_operand:SVE_FULL_F_SCALAR 2 "register_operand")
5196 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5197 SVE_COND_FP_BINARY_INT))]
5199 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5200 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
5201 [ ?&w , Upl , w , w ; yes ] movprfx\t%Z0, %Z2\;<sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
5205 ;; Predicated floating-point binary operations with merging, taking an
5206 ;; integer as their second operand.
5207 (define_expand "@cond_<optab><mode>"
5208 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5210 [(match_operand:<VPRED> 1 "register_operand")
5213 (const_int SVE_STRICT_GP)
5214 (match_operand:SVE_FULL_F 2 "register_operand")
5215 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5216 SVE_COND_FP_BINARY_INT)
5217 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5222 ;; Predicated floating-point binary operations that take an integer as their
5223 ;; second operand, with inactive lanes coming from the first operand.
5224 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5225 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5227 [(match_operand:<VPRED> 1 "register_operand")
5230 (const_int SVE_RELAXED_GP)
5231 (match_operand:SVE_FULL_F 2 "register_operand")
5232 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5233 SVE_COND_FP_BINARY_INT)
5237 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5238 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5239 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5241 "&& !rtx_equal_p (operands[1], operands[4])"
5243 operands[4] = copy_rtx (operands[1]);
5247 (define_insn "*cond_<optab><mode>_2_strict"
5248 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5250 [(match_operand:<VPRED> 1 "register_operand")
5253 (const_int SVE_STRICT_GP)
5254 (match_operand:SVE_FULL_F 2 "register_operand")
5255 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5256 SVE_COND_FP_BINARY_INT)
5260 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5261 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5262 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5266 ;; Predicated floating-point binary operations that take an integer as
5267 ;; their second operand, with the values of inactive lanes being distinct
5268 ;; from the other inputs.
5269 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5270 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5272 [(match_operand:<VPRED> 1 "register_operand")
5275 (const_int SVE_RELAXED_GP)
5276 (match_operand:SVE_FULL_F 2 "register_operand")
5277 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5278 SVE_COND_FP_BINARY_INT)
5279 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5281 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5282 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5283 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5284 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5285 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5286 [ ?&w , Upl , w , w , w ] #
5290 if (reload_completed
5291 && register_operand (operands[4], <MODE>mode)
5292 && !rtx_equal_p (operands[0], operands[4]))
5294 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5295 operands[4], operands[1]));
5296 operands[4] = operands[2] = operands[0];
5298 else if (!rtx_equal_p (operands[1], operands[5]))
5299 operands[5] = copy_rtx (operands[1]);
5303 [(set_attr "movprfx" "yes")]
5306 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5307 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5309 [(match_operand:<VPRED> 1 "register_operand")
5312 (const_int SVE_STRICT_GP)
5313 (match_operand:SVE_FULL_F 2 "register_operand")
5314 (match_operand:<V_INT_EQUIV> 3 "register_operand")]
5315 SVE_COND_FP_BINARY_INT)
5316 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5318 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5319 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5320 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5321 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5322 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5323 [ ?&w , Upl , w , w , w ] #
5325 "&& reload_completed
5326 && register_operand (operands[4], <MODE>mode)
5327 && !rtx_equal_p (operands[0], operands[4])"
5329 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5330 operands[4], operands[1]));
5331 operands[4] = operands[2] = operands[0];
5333 [(set_attr "movprfx" "yes")]
5336 ;; -------------------------------------------------------------------------
5337 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
5338 ;; -------------------------------------------------------------------------
5339 ;; Includes post-RA forms of:
5340 ;; - BFADD (SVE_B16B16)
5341 ;; - BFMUL (SVE_B16B16)
5342 ;; - BFSUB (SVE_B16B16)
5346 ;; -------------------------------------------------------------------------
5348 ;; Split a predicated instruction whose predicate is unused into an
5349 ;; unpredicated instruction.
5351 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5352 (unspec:SVE_FULL_F_BF
5353 [(match_operand:<VPRED> 1 "register_operand")
5354 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5355 (match_operand:SVE_FULL_F_BF 2 "register_operand")
5356 (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5360 && INTVAL (operands[4]) == SVE_RELAXED_GP"
5362 (SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF (match_dup 2) (match_dup 3)))]
5365 ;; Unpredicated floating-point binary operations (post-RA only).
5366 ;; These are generated by the split above.
5367 (define_insn "*post_ra_<sve_fp_op><mode>3"
5368 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w")
5369 (SVE_UNPRED_FP_BINARY:SVE_FULL_F_BF
5370 (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")
5371 (match_operand:SVE_FULL_F_BF 2 "register_operand" "w")))]
5372 "TARGET_SVE && reload_completed"
5373 "<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
5375 ;; -------------------------------------------------------------------------
5376 ;; ---- [FP] General binary arithmetic corresponding to unspecs
5377 ;; -------------------------------------------------------------------------
5378 ;; Includes merging forms of:
5379 ;; - BFADD (SVE_B16B16)
5380 ;; - BFMAX (SVE_B16B16)
5381 ;; - BFMAXNM (SVE_B16B16)
5382 ;; - BFMIN (SVE_B16B16)
5383 ;; - BFMINNM (SVE_B16B16)
5384 ;; - BFMUL (SVE_B16B16)
5385 ;; - BFSUB (SVE_B16B16)
5386 ;; - FADD (constant forms handled in the "Addition" section)
5390 ;; - FMAXNM (including #0.0 and #1.0)
5392 ;; - FMINNM (including #0.0 and #1.0)
5393 ;; - FMUL (including #0.5 and #2.0)
5397 ;; - FSUB (constant forms handled in the "Addition" section)
5398 ;; - FSUBR (constant forms handled in the "Subtraction" section)
5399 ;; -------------------------------------------------------------------------
5401 ;; Unpredicated floating-point binary operations.
5402 (define_insn "@aarch64_sve_<optab><mode>"
5403 [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
5405 [(match_operand:SVE_FULL_F 1 "register_operand" "w")
5406 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
5409 "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
5412 ;; Unpredicated floating-point binary operations that need to be predicated
5414 (define_expand "<optab><mode>3"
5415 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5416 (unspec:SVE_FULL_F_BF
5418 (const_int SVE_RELAXED_GP)
5419 (match_operand:SVE_FULL_F_BF 1 "<sve_pred_fp_rhs1_operand>")
5420 (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs2_operand>")]
5421 SVE_COND_FP_BINARY_OPTAB))]
5422 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
5424 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5428 ;; Predicated floating-point binary operations that have no immediate forms.
5429 (define_insn "@aarch64_pred_<optab><mode>"
5430 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5432 [(match_operand:<VPRED> 1 "register_operand")
5433 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5434 (match_operand:SVE_FULL_F 2 "register_operand")
5435 (match_operand:SVE_FULL_F 3 "register_operand")]
5436 SVE_COND_FP_BINARY_REG))]
5438 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5439 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5440 [ w , Upl , w , 0 ; * ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5441 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5445 ;; Predicated floating-point operations with merging.
5446 (define_expand "@cond_<optab><mode>"
5447 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5448 (unspec:SVE_FULL_F_BF
5449 [(match_operand:<VPRED> 1 "register_operand")
5450 (unspec:SVE_FULL_F_BF
5452 (const_int SVE_STRICT_GP)
5453 (match_operand:SVE_FULL_F_BF 2 "<sve_pred_fp_rhs1_operand>")
5454 (match_operand:SVE_FULL_F_BF 3 "<sve_pred_fp_rhs2_operand>")]
5456 (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
5458 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
5461 ;; Predicated floating-point operations, merging with the first input.
5462 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5463 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5464 (unspec:SVE_FULL_F_BF
5465 [(match_operand:<VPRED> 1 "register_operand")
5466 (unspec:SVE_FULL_F_BF
5468 (const_int SVE_RELAXED_GP)
5469 (match_operand:SVE_FULL_F_BF 2 "register_operand")
5470 (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5474 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
5475 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5476 [ w , Upl , 0 , w ; * ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5477 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5479 "&& !rtx_equal_p (operands[1], operands[4])"
5481 operands[4] = copy_rtx (operands[1]);
5485 (define_insn "*cond_<optab><mode>_2_strict"
5486 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5487 (unspec:SVE_FULL_F_BF
5488 [(match_operand:<VPRED> 1 "register_operand")
5489 (unspec:SVE_FULL_F_BF
5491 (const_int SVE_STRICT_GP)
5492 (match_operand:SVE_FULL_F_BF 2 "register_operand")
5493 (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5497 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
5498 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5499 [ w , Upl , 0 , w ; * ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5500 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5504 ;; Same for operations that take a 1-bit constant.
5505 (define_insn_and_rewrite "*cond_<optab><mode>_2_const_relaxed"
5506 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5508 [(match_operand:<VPRED> 1 "register_operand")
5511 (const_int SVE_RELAXED_GP)
5512 (match_operand:SVE_FULL_F 2 "register_operand")
5513 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5514 SVE_COND_FP_BINARY_I1)
5518 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
5519 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5520 [ ?w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5522 "&& !rtx_equal_p (operands[1], operands[4])"
5524 operands[4] = copy_rtx (operands[1]);
5528 (define_insn "*cond_<optab><mode>_2_const_strict"
5529 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5531 [(match_operand:<VPRED> 1 "register_operand")
5534 (const_int SVE_STRICT_GP)
5535 (match_operand:SVE_FULL_F 2 "register_operand")
5536 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5537 SVE_COND_FP_BINARY_I1)
5541 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
5542 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5543 [ ?w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5547 ;; Predicated floating-point operations, merging with the second input.
5548 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
5549 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5550 (unspec:SVE_FULL_F_BF
5551 [(match_operand:<VPRED> 1 "register_operand")
5552 (unspec:SVE_FULL_F_BF
5554 (const_int SVE_RELAXED_GP)
5555 (match_operand:SVE_FULL_F_BF 2 "register_operand")
5556 (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5560 "TARGET_SVE && (<supports_bf16_rev> || !<is_bf16>)"
5561 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5562 [ w , Upl , w , 0 ; * ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5563 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5565 "&& !rtx_equal_p (operands[1], operands[4])"
5567 operands[4] = copy_rtx (operands[1]);
5571 (define_insn "*cond_<optab><mode>_3_strict"
5572 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5573 (unspec:SVE_FULL_F_BF
5574 [(match_operand:<VPRED> 1 "register_operand")
5575 (unspec:SVE_FULL_F_BF
5577 (const_int SVE_STRICT_GP)
5578 (match_operand:SVE_FULL_F_BF 2 "register_operand")
5579 (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5583 "TARGET_SVE && (<supports_bf16_rev> || !<is_bf16>)"
5584 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5585 [ w , Upl , w , 0 ; * ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5586 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5590 ;; Predicated floating-point operations, merging with an independent value.
5591 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5592 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5593 (unspec:SVE_FULL_F_BF
5594 [(match_operand:<VPRED> 1 "register_operand")
5595 (unspec:SVE_FULL_F_BF
5597 (const_int SVE_RELAXED_GP)
5598 (match_operand:SVE_FULL_F_BF 2 "register_operand")
5599 (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5601 (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
5604 && (<supports_bf16> || !<is_bf16>)
5605 && !rtx_equal_p (operands[2], operands[4])
5606 && !((<supports_bf16_rev> || !<is_bf16>)
5607 && rtx_equal_p (operands[3], operands[4]))"
5608 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: is_rev ]
5609 [ &w , Upl , 0 , w , Dz ; * ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5610 [ &w , Upl , w , 0 , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5611 [ &w , Upl , w , w , Dz ; * ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5612 [ &w , Upl , w , w , 0 ; * ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5613 [ ?&w , Upl , w , w , w ; * ] #
5617 if (reload_completed
5618 && register_operand (operands[4], <MODE>mode)
5619 && !rtx_equal_p (operands[0], operands[4]))
5621 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5622 operands[4], operands[1]));
5623 operands[4] = operands[2] = operands[0];
5625 else if (!rtx_equal_p (operands[1], operands[5]))
5626 operands[5] = copy_rtx (operands[1]);
5630 [(set_attr "movprfx" "yes")
5631 (set_attr "is_bf16" "<is_bf16>")
5632 (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
5635 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
5636 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
5637 (unspec:SVE_FULL_F_BF
5638 [(match_operand:<VPRED> 1 "register_operand")
5639 (unspec:SVE_FULL_F_BF
5641 (const_int SVE_STRICT_GP)
5642 (match_operand:SVE_FULL_F_BF 2 "register_operand")
5643 (match_operand:SVE_FULL_F_BF 3 "register_operand")]
5645 (match_operand:SVE_FULL_F_BF 4 "aarch64_simd_reg_or_zero")]
5648 && (<supports_bf16> || !<is_bf16>)
5649 && !rtx_equal_p (operands[2], operands[4])
5650 && !((<supports_bf16_rev> || !<is_bf16>)
5651 && rtx_equal_p (operands[3], operands[4]))"
5652 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: is_rev ]
5653 [ &w , Upl , 0 , w , Dz ; * ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5654 [ &w , Upl , w , 0 , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
5655 [ &w , Upl , w , w , Dz ; * ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5656 [ &w , Upl , w , w , 0 ; * ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5657 [ ?&w , Upl , w , w , w ; * ] #
5659 "&& reload_completed
5660 && register_operand (operands[4], <MODE>mode)
5661 && !rtx_equal_p (operands[0], operands[4])"
5663 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5664 operands[4], operands[1]));
5665 operands[4] = operands[2] = operands[0];
5667 [(set_attr "movprfx" "yes")
5668 (set_attr "is_bf16" "<is_bf16>")
5669 (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
5672 ;; Same for operations that take a 1-bit constant.
5673 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_relaxed"
5674 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5676 [(match_operand:<VPRED> 1 "register_operand")
5679 (const_int SVE_RELAXED_GP)
5680 (match_operand:SVE_FULL_F 2 "register_operand")
5681 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5682 SVE_COND_FP_BINARY_I1)
5683 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5685 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5686 {@ [ cons: =0 , 1 , 2 , 4 ]
5687 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5688 [ w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5689 [ ?w , Upl , w , w ] #
5693 if (reload_completed
5694 && register_operand (operands[4], <MODE>mode)
5695 && !rtx_equal_p (operands[0], operands[4]))
5697 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5698 operands[4], operands[1]));
5699 operands[4] = operands[2] = operands[0];
5701 else if (!rtx_equal_p (operands[1], operands[5]))
5702 operands[5] = copy_rtx (operands[1]);
5706 [(set_attr "movprfx" "yes")]
5709 (define_insn_and_rewrite "*cond_<optab><mode>_any_const_strict"
5710 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5712 [(match_operand:<VPRED> 1 "register_operand")
5715 (const_int SVE_STRICT_GP)
5716 (match_operand:SVE_FULL_F 2 "register_operand")
5717 (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_immediate>")]
5718 SVE_COND_FP_BINARY_I1)
5719 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5721 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5722 {@ [ cons: =0 , 1 , 2 , 4 ]
5723 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5724 [ w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5725 [ ?w , Upl , w , w ] #
5727 "&& reload_completed
5728 && register_operand (operands[4], <MODE>mode)
5729 && !rtx_equal_p (operands[0], operands[4])"
5731 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5732 operands[4], operands[1]));
5733 operands[4] = operands[2] = operands[0];
5735 [(set_attr "movprfx" "yes")]
5738 ;; -------------------------------------------------------------------------
5739 ;; ---- [FP] Addition
5740 ;; -------------------------------------------------------------------------
5744 ;; -------------------------------------------------------------------------
5746 ;; Predicated floating-point addition.
5747 (define_insn "@aarch64_pred_<optab><mode>"
5748 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5750 [(match_operand:<VPRED> 1 "register_operand")
5751 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5752 (match_operand:SVE_FULL_F 2 "register_operand")
5753 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
5756 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
5757 [ w , Upl , %0 , vsA , i ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5758 [ w , Upl , 0 , vsN , i ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5759 [ w , Upl , w , w , Z ; * ] #
5760 [ w , Upl , 0 , w , Ui1 ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5761 [ ?&w , Upl , w , vsA , i ; yes ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5762 [ ?&w , Upl , w , vsN , i ; yes ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5763 [ ?&w , Upl , w , w , Ui1 ; yes ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
5767 ;; Predicated floating-point addition of a constant, merging with the
5769 (define_insn_and_rewrite "*cond_add<mode>_2_const_relaxed"
5770 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5772 [(match_operand:<VPRED> 1 "register_operand")
5775 (const_int SVE_RELAXED_GP)
5776 (match_operand:SVE_FULL_F 2 "register_operand")
5777 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5782 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5783 [ w , Upl , 0 , vsA ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5784 [ w , Upl , 0 , vsN ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5785 [ ?w , Upl , w , vsA ; yes ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5786 [ ?w , Upl , w , vsN ; yes ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5788 "&& !rtx_equal_p (operands[1], operands[4])"
5790 operands[4] = copy_rtx (operands[1]);
5794 (define_insn "*cond_add<mode>_2_const_strict"
5795 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5797 [(match_operand:<VPRED> 1 "register_operand")
5800 (const_int SVE_STRICT_GP)
5801 (match_operand:SVE_FULL_F 2 "register_operand")
5802 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5807 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5808 [ w , Upl , 0 , vsA ; * ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5809 [ w , Upl , 0 , vsN ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5810 [ ?w , Upl , w , vsA ; yes ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5811 [ ?w , Upl , w , vsN ; yes ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5815 ;; Predicated floating-point addition of a constant, merging with an
5816 ;; independent value.
5817 (define_insn_and_rewrite "*cond_add<mode>_any_const_relaxed"
5818 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5820 [(match_operand:<VPRED> 1 "register_operand")
5823 (const_int SVE_RELAXED_GP)
5824 (match_operand:SVE_FULL_F 2 "register_operand")
5825 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5827 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5829 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5830 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5831 [ w , Upl , w , vsA , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5832 [ w , Upl , w , vsN , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5833 [ w , Upl , w , vsA , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5834 [ w , Upl , w , vsN , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5835 [ ?w , Upl , w , vsA , w ] #
5836 [ ?w , Upl , w , vsN , w ] #
5840 if (reload_completed
5841 && register_operand (operands[4], <MODE>mode)
5842 && !rtx_equal_p (operands[0], operands[4]))
5844 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5845 operands[4], operands[1]));
5846 operands[4] = operands[2] = operands[0];
5848 else if (!rtx_equal_p (operands[1], operands[5]))
5849 operands[5] = copy_rtx (operands[1]);
5853 [(set_attr "movprfx" "yes")]
5856 (define_insn_and_rewrite "*cond_add<mode>_any_const_strict"
5857 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5859 [(match_operand:<VPRED> 1 "register_operand")
5862 (const_int SVE_STRICT_GP)
5863 (match_operand:SVE_FULL_F 2 "register_operand")
5864 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_immediate")]
5866 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5868 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
5869 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
5870 [ w , Upl , w , vsA , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5871 [ w , Upl , w , vsN , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5872 [ w , Upl , w , vsA , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
5873 [ w , Upl , w , vsN , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
5874 [ ?w , Upl , w , vsA , w ] #
5875 [ ?w , Upl , w , vsN , w ] #
5877 "&& reload_completed
5878 && register_operand (operands[4], <MODE>mode)
5879 && !rtx_equal_p (operands[0], operands[4])"
5881 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
5882 operands[4], operands[1]));
5883 operands[4] = operands[2] = operands[0];
5885 [(set_attr "movprfx" "yes")]
5888 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
5890 ;; -------------------------------------------------------------------------
5891 ;; ---- [FP] Complex addition
5892 ;; -------------------------------------------------------------------------
5895 ;; -------------------------------------------------------------------------
5897 ;; Predicated FCADD.
5898 (define_insn "@aarch64_pred_<optab><mode>"
5899 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5901 [(match_operand:<VPRED> 1 "register_operand")
5902 (match_operand:SI 4 "aarch64_sve_gp_strictness")
5903 (match_operand:SVE_FULL_F 2 "register_operand")
5904 (match_operand:SVE_FULL_F 3 "register_operand")]
5907 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5908 [ w , Upl , 0 , w ; * ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5909 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5913 ;; Predicated FCADD with merging.
5914 (define_expand "@cond_<optab><mode>"
5915 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5917 [(match_operand:<VPRED> 1 "register_operand")
5920 (const_int SVE_STRICT_GP)
5921 (match_operand:SVE_FULL_F 2 "register_operand")
5922 (match_operand:SVE_FULL_F 3 "register_operand")]
5924 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5929 ;; Predicated FCADD using ptrue for unpredicated optab for auto-vectorizer
5930 (define_expand "@cadd<rot><mode>3"
5931 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5934 (const_int SVE_RELAXED_GP)
5935 (match_operand:SVE_FULL_F 1 "register_operand")
5936 (match_operand:SVE_FULL_F 2 "register_operand")]
5940 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
5943 ;; Predicated FCADD, merging with the first input.
5944 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
5945 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5947 [(match_operand:<VPRED> 1 "register_operand")
5950 (const_int SVE_RELAXED_GP)
5951 (match_operand:SVE_FULL_F 2 "register_operand")
5952 (match_operand:SVE_FULL_F 3 "register_operand")]
5957 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5958 [ w , Upl , 0 , w ; * ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5959 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5961 "&& !rtx_equal_p (operands[1], operands[4])"
5963 operands[4] = copy_rtx (operands[1]);
5967 (define_insn "*cond_<optab><mode>_2_strict"
5968 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5970 [(match_operand:<VPRED> 1 "register_operand")
5973 (const_int SVE_STRICT_GP)
5974 (match_operand:SVE_FULL_F 2 "register_operand")
5975 (match_operand:SVE_FULL_F 3 "register_operand")]
5980 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
5981 [ w , Upl , 0 , w ; * ] fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5982 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
5986 ;; Predicated FCADD, merging with an independent value.
5987 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
5988 [(set (match_operand:SVE_FULL_F 0 "register_operand")
5990 [(match_operand:<VPRED> 1 "register_operand")
5993 (const_int SVE_RELAXED_GP)
5994 (match_operand:SVE_FULL_F 2 "register_operand")
5995 (match_operand:SVE_FULL_F 3 "register_operand")]
5997 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
5999 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
6000 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
6001 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6002 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6003 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6004 [ ?&w , Upl , w , w , w ] #
6008 if (reload_completed
6009 && register_operand (operands[4], <MODE>mode)
6010 && !rtx_equal_p (operands[0], operands[4]))
6012 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
6013 operands[4], operands[1]));
6014 operands[4] = operands[2] = operands[0];
6016 else if (!rtx_equal_p (operands[1], operands[5]))
6017 operands[5] = copy_rtx (operands[1]);
6021 [(set_attr "movprfx" "yes")]
6024 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
6025 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6027 [(match_operand:<VPRED> 1 "register_operand")
6030 (const_int SVE_STRICT_GP)
6031 (match_operand:SVE_FULL_F 2 "register_operand")
6032 (match_operand:SVE_FULL_F 3 "register_operand")]
6034 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6036 "TARGET_SVE && !rtx_equal_p (operands[2], operands[4])"
6037 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
6038 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6039 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6040 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fcadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>, #<rot>
6041 [ ?&w , Upl , w , w , w ] #
6043 "&& reload_completed
6044 && register_operand (operands[4], <MODE>mode)
6045 && !rtx_equal_p (operands[0], operands[4])"
6047 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
6048 operands[4], operands[1]));
6049 operands[4] = operands[2] = operands[0];
6051 [(set_attr "movprfx" "yes")]
6054 ;; -------------------------------------------------------------------------
6055 ;; ---- [FP] Subtraction
6056 ;; -------------------------------------------------------------------------
6060 ;; -------------------------------------------------------------------------
6062 ;; Predicated floating-point subtraction.
6063 (define_insn "@aarch64_pred_<optab><mode>"
6064 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6066 [(match_operand:<VPRED> 1 "register_operand")
6067 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6068 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_operand")
6069 (match_operand:SVE_FULL_F 3 "register_operand")]
6072 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
6073 [ w , Upl , vsA , 0 , i ; * ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6074 [ w , Upl , w , w , Z ; * ] #
6075 [ w , Upl , 0 , w , Ui1 ; * ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6076 [ w , Upl , w , 0 , Ui1 ; * ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6077 [ ?&w , Upl , vsA , w , i ; yes ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6078 [ ?&w , Upl , w , w , Ui1 ; yes ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6082 ;; Predicated floating-point subtraction from a constant, merging with the
6084 (define_insn_and_rewrite "*cond_sub<mode>_3_const_relaxed"
6085 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6087 [(match_operand:<VPRED> 1 "register_operand")
6090 (const_int SVE_RELAXED_GP)
6091 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6092 (match_operand:SVE_FULL_F 3 "register_operand")]
6097 {@ [ cons: =0 , 1 , 3 ; attrs: movprfx ]
6098 [ w , Upl , 0 ; * ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6099 [ ?w , Upl , w ; yes ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6101 "&& !rtx_equal_p (operands[1], operands[4])"
6103 operands[4] = copy_rtx (operands[1]);
6107 (define_insn "*cond_sub<mode>_3_const_strict"
6108 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6110 [(match_operand:<VPRED> 1 "register_operand")
6113 (const_int SVE_STRICT_GP)
6114 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6115 (match_operand:SVE_FULL_F 3 "register_operand")]
6120 {@ [ cons: =0 , 1 , 3 ; attrs: movprfx ]
6121 [ w , Upl , 0 ; * ] fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6122 [ ?w , Upl , w ; yes ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6126 ;; Predicated floating-point subtraction from a constant, merging with an
6127 ;; independent value.
6128 (define_insn_and_rewrite "*cond_sub<mode>_const_relaxed"
6129 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6131 [(match_operand:<VPRED> 1 "register_operand")
6134 (const_int SVE_RELAXED_GP)
6135 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6136 (match_operand:SVE_FULL_F 3 "register_operand")]
6138 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6140 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
6141 {@ [ cons: =0 , 1 , 3 , 4 ]
6142 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6143 [ w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6144 [ ?w , Upl , w , w ] #
6148 if (reload_completed
6149 && register_operand (operands[4], <MODE>mode)
6150 && !rtx_equal_p (operands[0], operands[4]))
6152 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6153 operands[4], operands[1]));
6154 operands[4] = operands[3] = operands[0];
6156 else if (!rtx_equal_p (operands[1], operands[5]))
6157 operands[5] = copy_rtx (operands[1]);
6161 [(set_attr "movprfx" "yes")]
6164 (define_insn_and_rewrite "*cond_sub<mode>_const_strict"
6165 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6167 [(match_operand:<VPRED> 1 "register_operand")
6170 (const_int SVE_STRICT_GP)
6171 (match_operand:SVE_FULL_F 2 "aarch64_sve_float_arith_immediate")
6172 (match_operand:SVE_FULL_F 3 "register_operand")]
6174 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6176 "TARGET_SVE && !rtx_equal_p (operands[3], operands[4])"
6177 {@ [ cons: =0 , 1 , 3 , 4 ]
6178 [ w , Upl , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6179 [ w , Upl , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %3.<Vetype>\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
6180 [ ?w , Upl , w , w ] #
6182 "&& reload_completed
6183 && register_operand (operands[4], <MODE>mode)
6184 && !rtx_equal_p (operands[0], operands[4])"
6186 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6187 operands[4], operands[1]));
6188 operands[4] = operands[3] = operands[0];
6190 [(set_attr "movprfx" "yes")]
6192 ;; Register merging forms are handled through SVE_COND_FP_BINARY.
6194 ;; -------------------------------------------------------------------------
6195 ;; ---- [FP] Absolute difference
6196 ;; -------------------------------------------------------------------------
6199 ;; -------------------------------------------------------------------------
6201 ;; Predicated floating-point absolute difference.
6202 (define_expand "@aarch64_pred_abd<mode>"
6203 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6205 [(match_operand:<VPRED> 1 "register_operand")
6206 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6210 (match_operand:SVE_FULL_F 2 "register_operand")
6211 (match_operand:SVE_FULL_F 3 "register_operand")]
6217 ;; Predicated floating-point absolute difference.
6218 (define_insn_and_rewrite "*aarch64_pred_abd<mode>_relaxed"
6219 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6221 [(match_operand:<VPRED> 1 "register_operand")
6222 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6225 (const_int SVE_RELAXED_GP)
6226 (match_operand:SVE_FULL_F 2 "register_operand")
6227 (match_operand:SVE_FULL_F 3 "register_operand")]
6231 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6232 [ w , Upl , %0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6233 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6235 "&& !rtx_equal_p (operands[1], operands[5])"
6237 operands[5] = copy_rtx (operands[1]);
6241 (define_insn "*aarch64_pred_abd<mode>_strict"
6242 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6244 [(match_operand:<VPRED> 1 "register_operand")
6245 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6248 (const_int SVE_STRICT_GP)
6249 (match_operand:SVE_FULL_F 2 "register_operand")
6250 (match_operand:SVE_FULL_F 3 "register_operand")]
6254 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6255 [ w , Upl , %0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6256 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6260 (define_expand "@aarch64_cond_abd<mode>"
6261 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6263 [(match_operand:<VPRED> 1 "register_operand")
6266 (const_int SVE_STRICT_GP)
6269 (const_int SVE_STRICT_GP)
6270 (match_operand:SVE_FULL_F 2 "register_operand")
6271 (match_operand:SVE_FULL_F 3 "register_operand")]
6274 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6278 if (rtx_equal_p (operands[3], operands[4]))
6279 std::swap (operands[2], operands[3]);
6282 ;; Predicated floating-point absolute difference, merging with the first
6284 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_2_relaxed"
6285 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6287 [(match_operand:<VPRED> 1 "register_operand")
6290 (const_int SVE_RELAXED_GP)
6293 (const_int SVE_RELAXED_GP)
6294 (match_operand:SVE_FULL_F 2 "register_operand")
6295 (match_operand:SVE_FULL_F 3 "register_operand")]
6301 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6302 [ w , Upl , 0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6303 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6305 "&& (!rtx_equal_p (operands[1], operands[4])
6306 || !rtx_equal_p (operands[1], operands[5]))"
6308 operands[4] = copy_rtx (operands[1]);
6309 operands[5] = copy_rtx (operands[1]);
6313 (define_insn "*aarch64_cond_abd<mode>_2_strict"
6314 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6316 [(match_operand:<VPRED> 1 "register_operand")
6319 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6322 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6323 (match_operand:SVE_FULL_F 2 "register_operand")
6324 (match_operand:SVE_FULL_F 3 "register_operand")]
6330 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6331 [ w , Upl , 0 , w ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6332 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6336 ;; Predicated floating-point absolute difference, merging with the second
6338 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_3_relaxed"
6339 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6341 [(match_operand:<VPRED> 1 "register_operand")
6344 (const_int SVE_RELAXED_GP)
6347 (const_int SVE_RELAXED_GP)
6348 (match_operand:SVE_FULL_F 2 "register_operand")
6349 (match_operand:SVE_FULL_F 3 "register_operand")]
6355 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6356 [ w , Upl , w , 0 ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6357 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6359 "&& (!rtx_equal_p (operands[1], operands[4])
6360 || !rtx_equal_p (operands[1], operands[5]))"
6362 operands[4] = copy_rtx (operands[1]);
6363 operands[5] = copy_rtx (operands[1]);
6367 (define_insn "*aarch64_cond_abd<mode>_3_strict"
6368 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6370 [(match_operand:<VPRED> 1 "register_operand")
6373 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6376 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6377 (match_operand:SVE_FULL_F 2 "register_operand")
6378 (match_operand:SVE_FULL_F 3 "register_operand")]
6384 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6385 [ w , Upl , w , 0 ; * ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6386 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6390 ;; Predicated floating-point absolute difference, merging with an
6391 ;; independent value.
6392 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_relaxed"
6393 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6395 [(match_operand:<VPRED> 1 "register_operand")
6398 (const_int SVE_RELAXED_GP)
6401 (const_int SVE_RELAXED_GP)
6402 (match_operand:SVE_FULL_F 2 "register_operand")
6403 (match_operand:SVE_FULL_F 3 "register_operand")]
6406 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6409 && !rtx_equal_p (operands[2], operands[4])
6410 && !rtx_equal_p (operands[3], operands[4])"
6411 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
6412 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6413 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6414 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6415 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6416 [ ?&w , Upl , w , w , w ] #
6420 if (reload_completed
6421 && register_operand (operands[4], <MODE>mode)
6422 && !rtx_equal_p (operands[0], operands[4]))
6424 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6425 operands[4], operands[1]));
6426 operands[4] = operands[3] = operands[0];
6428 else if (!rtx_equal_p (operands[1], operands[5])
6429 || !rtx_equal_p (operands[1], operands[6]))
6431 operands[5] = copy_rtx (operands[1]);
6432 operands[6] = copy_rtx (operands[1]);
6437 [(set_attr "movprfx" "yes")]
6440 (define_insn_and_rewrite "*aarch64_cond_abd<mode>_any_strict"
6441 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6443 [(match_operand:<VPRED> 1 "register_operand")
6446 (match_operand:SI 5 "aarch64_sve_gp_strictness")
6449 (match_operand:SI 6 "aarch64_sve_gp_strictness")
6450 (match_operand:SVE_FULL_F 2 "register_operand")
6451 (match_operand:SVE_FULL_F 3 "register_operand")]
6454 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6457 && !rtx_equal_p (operands[2], operands[4])
6458 && !rtx_equal_p (operands[3], operands[4])"
6459 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
6460 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6461 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
6462 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6463 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6464 [ ?&w , Upl , w , w , w ] #
6466 "&& reload_completed
6467 && register_operand (operands[4], <MODE>mode)
6468 && !rtx_equal_p (operands[0], operands[4])"
6470 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[3],
6471 operands[4], operands[1]));
6472 operands[4] = operands[3] = operands[0];
6474 [(set_attr "movprfx" "yes")]
6477 ;; -------------------------------------------------------------------------
6478 ;; ---- [FP] Multiplication
6479 ;; -------------------------------------------------------------------------
6481 ;; - BFMUL (SVE_B16B16)
6483 ;; -------------------------------------------------------------------------
6485 ;; Predicated floating-point multiplication.
6486 (define_insn "@aarch64_pred_<optab><mode>"
6487 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6489 [(match_operand:<VPRED> 1 "register_operand")
6490 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6491 (match_operand:SVE_FULL_F 2 "register_operand")
6492 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
6495 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
6496 [ w , Upl , %0 , vsM , i ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6497 [ w , Upl , w , w , Z ; * ] #
6498 [ w , Upl , 0 , w , Ui1 ; * ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6499 [ ?&w , Upl , w , vsM , i ; yes ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6500 [ ?&w , Upl , w , w , Ui1 ; yes ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6504 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6505 ;; SVE_COND_FP_BINARY_I1.
6507 ;; Unpredicated multiplication by selected lanes.
6508 (define_insn "@aarch64_mul_lane_<mode>"
6509 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand" "=w")
6511 (unspec:SVE_FULL_F_BF
6512 [(match_operand:SVE_FULL_F_BF 2 "register_operand" "<sve_lane_con>")
6513 (match_operand:SI 3 "const_int_operand")]
6514 UNSPEC_SVE_LANE_SELECT)
6515 (match_operand:SVE_FULL_F_BF 1 "register_operand" "w")))]
6517 "<b>fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
6520 ;; -------------------------------------------------------------------------
6521 ;; ---- [FP] Division
6522 ;; -------------------------------------------------------------------------
6523 ;; The patterns in this section are synthetic.
6524 ;; -------------------------------------------------------------------------
6526 (define_expand "div<mode>3"
6527 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6530 (const_int SVE_RELAXED_GP)
6531 (match_operand:SVE_FULL_F 1 "nonmemory_operand")
6532 (match_operand:SVE_FULL_F 2 "register_operand")]
6536 if (aarch64_emit_approx_div (operands[0], operands[1], operands[2]))
6539 operands[1] = force_reg (<MODE>mode, operands[1]);
6540 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
6544 (define_expand "@aarch64_frecpe<mode>"
6545 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6547 [(match_operand:SVE_FULL_F 1 "register_operand")]
6552 (define_expand "@aarch64_frecps<mode>"
6553 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6555 [(match_operand:SVE_FULL_F 1 "register_operand")
6556 (match_operand:SVE_FULL_F 2 "register_operand")]
6561 ;; -------------------------------------------------------------------------
6562 ;; ---- [FP] Binary logical operations
6563 ;; -------------------------------------------------------------------------
6568 ;; -------------------------------------------------------------------------
6570 ;; Binary logical operations on floating-point modes. We avoid subregs
6571 ;; by providing this, but we need to use UNSPECs since rtx logical ops
6572 ;; aren't defined for floating-point modes.
6573 (define_insn "*<optab><mode>3"
6574 [(set (match_operand:SVE_F 0 "register_operand" "=w")
6576 [(match_operand:SVE_F 1 "register_operand" "w")
6577 (match_operand:SVE_F 2 "register_operand" "w")]
6580 "<logicalf_op>\t%0.d, %1.d, %2.d"
6583 ;; -------------------------------------------------------------------------
6584 ;; ---- [FP] Sign copying
6585 ;; -------------------------------------------------------------------------
6586 ;; The patterns in this section are synthetic.
6587 ;; -------------------------------------------------------------------------
6589 (define_expand "copysign<mode>3"
6590 [(match_operand:SVE_FULL_F 0 "register_operand")
6591 (match_operand:SVE_FULL_F 1 "register_operand")
6592 (match_operand:SVE_FULL_F 2 "nonmemory_operand")]
6595 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6596 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6597 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6598 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6600 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6601 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6604 = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6605 HOST_WIDE_INT_M1U << bits);
6607 /* copysign (x, -1) should instead be expanded as orr with the sign
6609 if (!REG_P (operands[2]))
6611 rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
6612 if (GET_CODE (op2_elt) == CONST_DOUBLE
6613 && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
6615 emit_insn (gen_ior<v_int_equiv>3 (int_res, arg1, v_sign_bitmask));
6616 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6621 operands[2] = force_reg (<MODE>mode, operands[2]);
6622 emit_insn (gen_and<v_int_equiv>3 (sign, arg2, v_sign_bitmask));
6623 emit_insn (gen_and<v_int_equiv>3
6625 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6628 emit_insn (gen_ior<v_int_equiv>3 (int_res, sign, mant));
6629 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6634 (define_expand "cond_copysign<mode>"
6635 [(match_operand:SVE_FULL_F 0 "register_operand")
6636 (match_operand:<VPRED> 1 "register_operand")
6637 (match_operand:SVE_FULL_F 2 "register_operand")
6638 (match_operand:SVE_FULL_F 3 "nonmemory_operand")
6639 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
6642 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6643 rtx mant = gen_reg_rtx (<V_INT_EQUIV>mode);
6644 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6645 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6647 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6648 rtx arg3 = lowpart_subreg (<V_INT_EQUIV>mode, operands[3], <MODE>mode);
6649 rtx arg4 = lowpart_subreg (<V_INT_EQUIV>mode, operands[4], <MODE>mode);
6652 = aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6653 HOST_WIDE_INT_M1U << bits);
6655 /* copysign (x, -1) should instead be expanded as orr with the sign
6657 if (!REG_P (operands[3]))
6659 rtx op2_elt = unwrap_const_vec_duplicate (operands[3]);
6660 if (GET_CODE (op2_elt) == CONST_DOUBLE
6661 && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
6663 arg3 = force_reg (<V_INT_EQUIV>mode, v_sign_bitmask);
6664 emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], arg2,
6666 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6671 operands[2] = force_reg (<MODE>mode, operands[3]);
6672 emit_insn (gen_and<v_int_equiv>3 (sign, arg3, v_sign_bitmask));
6673 emit_insn (gen_and<v_int_equiv>3
6675 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6678 emit_insn (gen_cond_ior<v_int_equiv> (int_res, operands[1], sign, mant,
6680 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6685 (define_expand "xorsign<mode>3"
6686 [(match_operand:SVE_FULL_F 0 "register_operand")
6687 (match_operand:SVE_FULL_F 1 "register_operand")
6688 (match_operand:SVE_FULL_F 2 "register_operand")]
6691 rtx sign = gen_reg_rtx (<V_INT_EQUIV>mode);
6692 rtx int_res = gen_reg_rtx (<V_INT_EQUIV>mode);
6693 int bits = GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1;
6695 rtx arg1 = lowpart_subreg (<V_INT_EQUIV>mode, operands[1], <MODE>mode);
6696 rtx arg2 = lowpart_subreg (<V_INT_EQUIV>mode, operands[2], <MODE>mode);
6698 emit_insn (gen_and<v_int_equiv>3
6700 aarch64_simd_gen_const_vector_dup (<V_INT_EQUIV>mode,
6703 emit_insn (gen_xor<v_int_equiv>3 (int_res, arg1, sign));
6704 emit_move_insn (operands[0], gen_lowpart (<MODE>mode, int_res));
6709 ;; -------------------------------------------------------------------------
6710 ;; ---- [FP] Maximum and minimum
6711 ;; -------------------------------------------------------------------------
6717 ;; -------------------------------------------------------------------------
6719 ;; Predicated floating-point maximum/minimum.
6720 (define_insn "@aarch64_pred_<optab><mode>"
6721 [(set (match_operand:SVE_FULL_F 0 "register_operand")
6723 [(match_operand:<VPRED> 1 "register_operand")
6724 (match_operand:SI 4 "aarch64_sve_gp_strictness")
6725 (match_operand:SVE_FULL_F 2 "register_operand")
6726 (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
6727 SVE_COND_FP_MAXMIN))]
6729 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
6730 [ w , Upl , %0 , vsB ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6731 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6732 [ ?&w , Upl , w , vsB ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
6733 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
6737 ;; Merging forms are handled through SVE_COND_FP_BINARY and
6738 ;; SVE_COND_FP_BINARY_I1.
6740 ;; -------------------------------------------------------------------------
6741 ;; ---- [PRED] Binary logical operations
6742 ;; -------------------------------------------------------------------------
6750 ;; -------------------------------------------------------------------------
6752 ;; Predicate AND. We can reuse one of the inputs as the GP.
6753 ;; Doubling the second operand is the preferred implementation
6754 ;; of the MOV alias, so we use that instead of %1/z, %1, %2.
6755 (define_insn "and<mode>3"
6756 [(set (match_operand:PRED_ALL 0 "register_operand")
6757 (and:PRED_ALL (match_operand:PRED_ALL 1 "register_operand")
6758 (match_operand:PRED_ALL 2 "register_operand")))]
6760 {@ [ cons: =0, 1 , 2 ; attrs: pred_clobber ]
6761 [ &Upa , Upa , Upa ; yes ] and\t%0.b, %1/z, %2.b, %2.b
6762 [ ?Upa , 0Upa, 0Upa; yes ] ^
6763 [ Upa , Upa , Upa ; no ] ^
6767 ;; Unpredicated predicate EOR and ORR.
6768 (define_expand "<optab><mode>3"
6769 [(set (match_operand:PRED_ALL 0 "register_operand")
6771 (LOGICAL_OR:PRED_ALL
6772 (match_operand:PRED_ALL 1 "register_operand")
6773 (match_operand:PRED_ALL 2 "register_operand"))
6777 operands[3] = aarch64_ptrue_reg (<MODE>mode);
6781 ;; Predicated predicate AND, EOR and ORR.
6782 (define_insn "@aarch64_pred_<optab><mode>_z"
6783 [(set (match_operand:PRED_ALL 0 "register_operand")
6786 (match_operand:PRED_ALL 2 "register_operand")
6787 (match_operand:PRED_ALL 3 "register_operand"))
6788 (match_operand:PRED_ALL 1 "register_operand")))]
6790 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6791 [ &Upa , Upa , Upa , Upa ; yes ] <logical>\t%0.b, %1/z, %2.b, %3.b
6792 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6793 [ Upa , Upa , Upa , Upa ; no ] ^
6797 ;; Perform a logical operation on operands 2 and 3, using operand 1 as
6798 ;; the GP. Store the result in operand 0 and set the flags in the same
6799 ;; way as for PTEST.
6800 (define_insn "*<optab><mode>3_cc"
6801 [(set (reg:CC_NZC CC_REGNUM)
6803 [(match_operand:VNx16BI 1 "register_operand")
6805 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6808 (match_operand:PRED_ALL 2 "register_operand")
6809 (match_operand:PRED_ALL 3 "register_operand"))
6812 (set (match_operand:PRED_ALL 0 "register_operand")
6813 (and:PRED_ALL (LOGICAL:PRED_ALL (match_dup 2) (match_dup 3))
6816 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6817 [ &Upa , Upa , Upa , Upa ; yes ] <logical>s\t%0.b, %1/z, %2.b, %3.b
6818 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6819 [ Upa , Upa , Upa , Upa ; no ] ^
6823 ;; Same with just the flags result.
6824 (define_insn "*<optab><mode>3_ptest"
6825 [(set (reg:CC_NZC CC_REGNUM)
6827 [(match_operand:VNx16BI 1 "register_operand")
6829 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6832 (match_operand:PRED_ALL 2 "register_operand")
6833 (match_operand:PRED_ALL 3 "register_operand"))
6836 (clobber (match_scratch:VNx16BI 0))]
6838 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6839 [ &Upa , Upa , Upa , Upa ; yes ] <logical>s\t%0.b, %1/z, %2.b, %3.b
6840 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6841 [ Upa , Upa , Upa , Upa ; no ] ^
6845 ;; -------------------------------------------------------------------------
6846 ;; ---- [PRED] Binary logical operations (inverted second input)
6847 ;; -------------------------------------------------------------------------
6851 ;; -------------------------------------------------------------------------
6853 ;; Predicated predicate BIC and ORN.
6854 (define_insn "aarch64_pred_<nlogical><mode>_z"
6855 [(set (match_operand:PRED_ALL 0 "register_operand")
6858 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand"))
6859 (match_operand:PRED_ALL 2 "register_operand"))
6860 (match_operand:PRED_ALL 1 "register_operand")))]
6862 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6863 [ &Upa , Upa , Upa , Upa ; yes ] <nlogical>\t%0.b, %1/z, %2.b, %3.b
6864 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6865 [ Upa , Upa , Upa , Upa ; no ] ^
6869 ;; Same, but set the flags as a side-effect.
6870 (define_insn "*<nlogical><mode>3_cc"
6871 [(set (reg:CC_NZC CC_REGNUM)
6873 [(match_operand:VNx16BI 1 "register_operand")
6875 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6879 (match_operand:PRED_ALL 3 "register_operand"))
6880 (match_operand:PRED_ALL 2 "register_operand"))
6883 (set (match_operand:PRED_ALL 0 "register_operand")
6884 (and:PRED_ALL (NLOGICAL:PRED_ALL
6885 (not:PRED_ALL (match_dup 3))
6889 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6890 [ &Upa , Upa , Upa , Upa ; yes ] <nlogical>s\t%0.b, %1/z, %2.b, %3.b
6891 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6892 [ Upa , Upa , Upa , Upa ; no ] ^
6896 ;; Same with just the flags result.
6897 (define_insn "*<nlogical><mode>3_ptest"
6898 [(set (reg:CC_NZC CC_REGNUM)
6900 [(match_operand:VNx16BI 1 "register_operand")
6902 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6906 (match_operand:PRED_ALL 3 "register_operand"))
6907 (match_operand:PRED_ALL 2 "register_operand"))
6910 (clobber (match_scratch:VNx16BI 0))]
6912 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6913 [ &Upa , Upa , Upa , Upa ; yes ] <nlogical>s\t%0.b, %1/z, %2.b, %3.b
6914 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6915 [ Upa , Upa , Upa , Upa ; no ] ^
6919 ;; -------------------------------------------------------------------------
6920 ;; ---- [PRED] Binary logical operations (inverted result)
6921 ;; -------------------------------------------------------------------------
6925 ;; -------------------------------------------------------------------------
6927 ;; Predicated predicate NAND and NOR.
6928 (define_insn "aarch64_pred_<logical_nn><mode>_z"
6929 [(set (match_operand:PRED_ALL 0 "register_operand")
6932 (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand"))
6933 (not:PRED_ALL (match_operand:PRED_ALL 3 "register_operand")))
6934 (match_operand:PRED_ALL 1 "register_operand")))]
6936 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6937 [ &Upa , Upa , Upa , Upa ; yes ] <logical_nn>\t%0.b, %1/z, %2.b, %3.b
6938 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6939 [ Upa , Upa , Upa , Upa ; no ] ^
6943 ;; Same, but set the flags as a side-effect.
6944 (define_insn "*<logical_nn><mode>3_cc"
6945 [(set (reg:CC_NZC CC_REGNUM)
6947 [(match_operand:VNx16BI 1 "register_operand")
6949 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6953 (match_operand:PRED_ALL 2 "register_operand"))
6955 (match_operand:PRED_ALL 3 "register_operand")))
6958 (set (match_operand:PRED_ALL 0 "register_operand")
6959 (and:PRED_ALL (NLOGICAL:PRED_ALL
6960 (not:PRED_ALL (match_dup 2))
6961 (not:PRED_ALL (match_dup 3)))
6964 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6965 [ &Upa , Upa , Upa , Upa ; yes ] <logical_nn>s\t%0.b, %1/z, %2.b, %3.b
6966 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6967 [ Upa , Upa , Upa , Upa ; no ] ^
6971 ;; Same with just the flags result.
6972 (define_insn "*<logical_nn><mode>3_ptest"
6973 [(set (reg:CC_NZC CC_REGNUM)
6975 [(match_operand:VNx16BI 1 "register_operand")
6977 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
6981 (match_operand:PRED_ALL 2 "register_operand"))
6983 (match_operand:PRED_ALL 3 "register_operand")))
6986 (clobber (match_scratch:VNx16BI 0))]
6988 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
6989 [ &Upa , Upa , Upa , Upa ; yes ] <logical_nn>s\t%0.b, %1/z, %2.b, %3.b
6990 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
6991 [ Upa , Upa , Upa , Upa ; no ] ^
6995 ;; =========================================================================
6996 ;; == Ternary arithmetic
6997 ;; =========================================================================
6999 ;; -------------------------------------------------------------------------
7000 ;; ---- [INT] MLA and MAD
7001 ;; -------------------------------------------------------------------------
7005 ;; -------------------------------------------------------------------------
7007 ;; Unpredicated integer addition of product.
7008 (define_expand "fma<mode>4"
7009 [(set (match_operand:SVE_I 0 "register_operand")
7014 (match_operand:SVE_I 1 "register_operand")
7015 (match_operand:SVE_I 2 "nonmemory_operand"))]
7017 (match_operand:SVE_I 3 "register_operand")))]
7020 if (aarch64_prepare_sve_int_fma (operands, PLUS))
7022 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7026 ;; Predicated integer addition of product.
7027 (define_insn "@aarch64_pred_fma<mode>"
7028 [(set (match_operand:SVE_I 0 "register_operand")
7031 [(match_operand:<VPRED> 1 "register_operand")
7033 (match_operand:SVE_I 2 "register_operand")
7034 (match_operand:SVE_I 3 "register_operand"))]
7036 (match_operand:SVE_I 4 "register_operand")))]
7038 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7039 [ w , Upl , %0 , w , w ; * ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7040 [ w , Upl , w , w , 0 ; * ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7041 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7045 ;; Predicated integer addition of product with merging.
7046 (define_expand "cond_fma<mode>"
7047 [(set (match_operand:SVE_I 0 "register_operand")
7049 [(match_operand:<VPRED> 1 "register_operand")
7052 (match_operand:SVE_I 2 "register_operand")
7053 (match_operand:SVE_I 3 "general_operand"))
7054 (match_operand:SVE_I 4 "register_operand"))
7055 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7059 if (aarch64_prepare_sve_cond_int_fma (operands, PLUS))
7061 /* Swap the multiplication operands if the fallback value is the
7062 second of the two. */
7063 if (rtx_equal_p (operands[3], operands[5]))
7064 std::swap (operands[2], operands[3]);
7068 ;; Predicated integer addition of product, merging with the first input.
7069 (define_insn "*cond_fma<mode>_2"
7070 [(set (match_operand:SVE_I 0 "register_operand")
7072 [(match_operand:<VPRED> 1 "register_operand")
7075 (match_operand:SVE_I 2 "register_operand")
7076 (match_operand:SVE_I 3 "register_operand"))
7077 (match_operand:SVE_I 4 "register_operand"))
7081 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7082 [ w , Upl , 0 , w , w ; * ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7083 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %2\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7087 ;; Predicated integer addition of product, merging with the third input.
7088 (define_insn "*cond_fma<mode>_4"
7089 [(set (match_operand:SVE_I 0 "register_operand")
7091 [(match_operand:<VPRED> 1 "register_operand")
7094 (match_operand:SVE_I 2 "register_operand")
7095 (match_operand:SVE_I 3 "register_operand"))
7096 (match_operand:SVE_I 4 "register_operand"))
7100 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7101 [ w , Upl , w , w , 0 ; * ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7102 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7106 ;; Predicated integer addition of product, merging with an independent value.
7107 (define_insn_and_rewrite "*cond_fma<mode>_any"
7108 [(set (match_operand:SVE_I 0 "register_operand")
7110 [(match_operand:<VPRED> 1 "register_operand")
7113 (match_operand:SVE_I 2 "register_operand")
7114 (match_operand:SVE_I 3 "register_operand"))
7115 (match_operand:SVE_I 4 "register_operand"))
7116 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7119 && !rtx_equal_p (operands[2], operands[5])
7120 && !rtx_equal_p (operands[3], operands[5])
7121 && !rtx_equal_p (operands[4], operands[5])"
7122 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7123 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7124 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7125 [ &w , Upl , 0 , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7126 [ &w , Upl , w , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mad\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7127 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7128 [ ?&w , Upl , w , w , w , w ] #
7130 "&& reload_completed
7131 && register_operand (operands[5], <MODE>mode)
7132 && !rtx_equal_p (operands[0], operands[5])"
7134 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7135 operands[5], operands[1]));
7136 operands[5] = operands[4] = operands[0];
7138 [(set_attr "movprfx" "yes")]
7141 ;; -------------------------------------------------------------------------
7142 ;; ---- [INT] MLS and MSB
7143 ;; -------------------------------------------------------------------------
7147 ;; -------------------------------------------------------------------------
7149 ;; Unpredicated integer subtraction of product.
7150 (define_expand "fnma<mode>4"
7151 [(set (match_operand:SVE_I 0 "register_operand")
7153 (match_operand:SVE_I 3 "register_operand")
7157 (match_operand:SVE_I 1 "register_operand")
7158 (match_operand:SVE_I 2 "general_operand"))]
7162 if (aarch64_prepare_sve_int_fma (operands, MINUS))
7164 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7168 ;; Predicated integer subtraction of product.
7169 (define_insn "@aarch64_pred_fnma<mode>"
7170 [(set (match_operand:SVE_I 0 "register_operand")
7172 (match_operand:SVE_I 4 "register_operand")
7174 [(match_operand:<VPRED> 1 "register_operand")
7176 (match_operand:SVE_I 2 "register_operand")
7177 (match_operand:SVE_I 3 "register_operand"))]
7180 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7181 [ w , Upl , %0 , w , w ; * ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7182 [ w , Upl , w , w , 0 ; * ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7183 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7187 ;; Predicated integer subtraction of product with merging.
7188 (define_expand "cond_fnma<mode>"
7189 [(set (match_operand:SVE_I 0 "register_operand")
7191 [(match_operand:<VPRED> 1 "register_operand")
7193 (match_operand:SVE_I 4 "register_operand")
7195 (match_operand:SVE_I 2 "register_operand")
7196 (match_operand:SVE_I 3 "general_operand")))
7197 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7201 if (aarch64_prepare_sve_cond_int_fma (operands, MINUS))
7203 /* Swap the multiplication operands if the fallback value is the
7204 second of the two. */
7205 if (rtx_equal_p (operands[3], operands[5]))
7206 std::swap (operands[2], operands[3]);
7210 ;; Predicated integer subtraction of product, merging with the first input.
7211 (define_insn "*cond_fnma<mode>_2"
7212 [(set (match_operand:SVE_I 0 "register_operand")
7214 [(match_operand:<VPRED> 1 "register_operand")
7216 (match_operand:SVE_I 4 "register_operand")
7218 (match_operand:SVE_I 2 "register_operand")
7219 (match_operand:SVE_I 3 "register_operand")))
7223 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7224 [ w , Upl , 0 , w , w ; * ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7225 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %2\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7229 ;; Predicated integer subtraction of product, merging with the third input.
7230 (define_insn "*cond_fnma<mode>_4"
7231 [(set (match_operand:SVE_I 0 "register_operand")
7233 [(match_operand:<VPRED> 1 "register_operand")
7235 (match_operand:SVE_I 4 "register_operand")
7237 (match_operand:SVE_I 2 "register_operand")
7238 (match_operand:SVE_I 3 "register_operand")))
7242 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7243 [ w , Upl , w , w , 0 ; * ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7244 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7248 ;; Predicated integer subtraction of product, merging with an
7249 ;; independent value.
7250 (define_insn_and_rewrite "*cond_fnma<mode>_any"
7251 [(set (match_operand:SVE_I 0 "register_operand")
7253 [(match_operand:<VPRED> 1 "register_operand")
7255 (match_operand:SVE_I 4 "register_operand")
7257 (match_operand:SVE_I 2 "register_operand")
7258 (match_operand:SVE_I 3 "register_operand")))
7259 (match_operand:SVE_I 5 "aarch64_simd_reg_or_zero")]
7262 && !rtx_equal_p (operands[2], operands[5])
7263 && !rtx_equal_p (operands[3], operands[5])
7264 && !rtx_equal_p (operands[4], operands[5])"
7265 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7266 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7267 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7268 [ &w , Upl , 0 , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7269 [ &w , Upl , w , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;msb\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7270 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7271 [ ?&w , Upl , w , w , w , w ] #
7273 "&& reload_completed
7274 && register_operand (operands[5], <MODE>mode)
7275 && !rtx_equal_p (operands[0], operands[5])"
7277 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7278 operands[5], operands[1]));
7279 operands[5] = operands[4] = operands[0];
7281 [(set_attr "movprfx" "yes")]
7284 ;; -------------------------------------------------------------------------
7285 ;; ---- [INT] Dot product
7286 ;; -------------------------------------------------------------------------
7292 ;; -------------------------------------------------------------------------
7294 ;; Four-element integer dot-product with accumulation.
7295 (define_insn "<sur>dot_prod<mode><vsi2qi>"
7296 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
7298 (unspec:SVE_FULL_SDI
7299 [(match_operand:<VSI2QI> 1 "register_operand")
7300 (match_operand:<VSI2QI> 2 "register_operand")]
7302 (match_operand:SVE_FULL_SDI 3 "register_operand")))]
7304 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7305 [ w , w , w , 0 ; * ] <sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
7306 [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.<Vetype>, %1.<Vetype_fourth>, %2.<Vetype_fourth>
7310 ;; Four-element integer dot-product by selected lanes with accumulation.
7311 (define_insn "@aarch64_<sur>dot_prod_lane<SVE_FULL_SDI:mode><SVE_FULL_BHI:mode>"
7312 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
7314 (unspec:SVE_FULL_SDI
7315 [(match_operand:SVE_FULL_BHI 1 "register_operand")
7316 (unspec:SVE_FULL_BHI
7317 [(match_operand:SVE_FULL_BHI 2 "register_operand")
7318 (match_operand:SI 3 "const_int_operand")]
7319 UNSPEC_SVE_LANE_SELECT)]
7321 (match_operand:SVE_FULL_SDI 4 "register_operand")))]
7323 && (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
7324 || (TARGET_SVE2p1_OR_SME2
7325 && <SVE_FULL_SDI:elem_bits> == 32
7326 && <SVE_FULL_BHI:elem_bits> == 16))"
7327 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7328 [ w , w , <SVE_FULL_SDI:sve_lane_con> , 0 ; * ] <sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
7329 [ ?&w , w , <SVE_FULL_SDI:sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<sur>dot\t%0.<SVE_FULL_SDI:Vetype>, %1.<SVE_FULL_BHI:Vetype>, %2.<SVE_FULL_BHI:Vetype>[%3]
7333 (define_insn "@<sur>dot_prod<mode><vsi2qi>"
7334 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7337 [(match_operand:<VSI2QI> 1 "register_operand")
7338 (match_operand:<VSI2QI> 2 "register_operand")]
7340 (match_operand:VNx4SI_ONLY 3 "register_operand")))]
7342 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7343 [ w , w , w , 0 ; * ] <sur>dot\t%0.s, %1.b, %2.b
7344 [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.b, %2.b
7348 (define_insn "@aarch64_<sur>dot_prod_lane<VNx4SI_ONLY:mode><VNx16QI_ONLY:mode>"
7349 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7352 [(match_operand:VNx16QI_ONLY 1 "register_operand")
7353 (unspec:VNx16QI_ONLY
7354 [(match_operand:VNx16QI_ONLY 2 "register_operand")
7355 (match_operand:SI 3 "const_int_operand")]
7356 UNSPEC_SVE_LANE_SELECT)]
7358 (match_operand:VNx4SI_ONLY 4 "register_operand")))]
7360 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7361 [ w , w , y , 0 ; * ] <sur>dot\t%0.s, %1.b, %2.b[%3]
7362 [ ?&w , w , y , w ; yes ] movprfx\t%0, %4\;<sur>dot\t%0.s, %1.b, %2.b[%3]
7366 ;; -------------------------------------------------------------------------
7367 ;; ---- [INT] Sum of absolute differences
7368 ;; -------------------------------------------------------------------------
7369 ;; The patterns in this section are synthetic.
7370 ;; -------------------------------------------------------------------------
7372 ;; Emit a sequence to produce a sum-of-absolute-differences of the inputs in
7373 ;; operands 1 and 2. The sequence also has to perform a widening reduction of
7374 ;; the difference into a vector and accumulate that into operand 3 before
7375 ;; copying that into the result operand 0.
7376 ;; Perform that with a sequence of:
7378 ;; [SU]ABD diff.b, p0/m, op1.b, op2.b
7379 ;; MOVPRFX op0, op3 // If necessary
7380 ;; UDOT op0.s, diff.b, ones.b
7381 (define_expand "<su>sad<vsi2qi>"
7382 [(use (match_operand:SVE_FULL_SDI 0 "register_operand"))
7383 (USMAX:<VSI2QI> (match_operand:<VSI2QI> 1 "register_operand")
7384 (match_operand:<VSI2QI> 2 "register_operand"))
7385 (use (match_operand:SVE_FULL_SDI 3 "register_operand"))]
7388 rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
7389 rtx diff = gen_reg_rtx (<VSI2QI>mode);
7390 emit_insn (gen_<su>abd<vsi2qi>3 (diff, operands[1], operands[2]));
7391 emit_insn (gen_udot_prod<mode><vsi2qi> (operands[0], diff, ones,
7397 ;; -------------------------------------------------------------------------
7398 ;; ---- [INT] Matrix multiply-accumulate
7399 ;; -------------------------------------------------------------------------
7404 ;; -------------------------------------------------------------------------
7406 (define_insn "@aarch64_sve_add_<optab><vsi2qi>"
7407 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
7410 [(match_operand:<VSI2QI> 2 "register_operand")
7411 (match_operand:<VSI2QI> 3 "register_operand")]
7413 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
7414 "TARGET_SVE_I8MM && TARGET_NON_STREAMING"
7415 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7416 [ w , 0 , w , w ; * ] <sur>mmla\t%0.s, %2.b, %3.b
7417 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sur>mmla\t%0.s, %2.b, %3.b
7421 ;; -------------------------------------------------------------------------
7422 ;; ---- [FP] General ternary arithmetic corresponding to unspecs
7423 ;; -------------------------------------------------------------------------
7424 ;; Includes merging patterns for:
7433 ;; -------------------------------------------------------------------------
7435 ;; Unpredicated floating-point ternary operations.
7436 (define_expand "<optab><mode>4"
7437 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7438 (unspec:SVE_FULL_F_BF
7440 (const_int SVE_RELAXED_GP)
7441 (match_operand:SVE_FULL_F_BF 1 "register_operand")
7442 (match_operand:SVE_FULL_F_BF 2 "register_operand")
7443 (match_operand:SVE_FULL_F_BF 3 "register_operand")]
7444 SVE_COND_FP_TERNARY))]
7445 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7447 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7451 ;; Predicated floating-point ternary operations.
7452 (define_insn "@aarch64_pred_<optab><mode>"
7453 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7454 (unspec:SVE_FULL_F_BF
7455 [(match_operand:<VPRED> 1 "register_operand")
7456 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7457 (match_operand:SVE_FULL_F_BF 2 "register_operand")
7458 (match_operand:SVE_FULL_F_BF 3 "register_operand")
7459 (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7460 SVE_COND_FP_TERNARY))]
7461 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7462 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx , is_rev ]
7463 [ w , Upl , %w , w , 0 ; * , * ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7464 [ w , Upl , 0 , w , w ; * , true ] <b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7465 [ ?&w , Upl , w , w , w ; yes , * ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7467 [(set_attr "is_bf16" "<is_bf16>")
7468 (set_attr "supports_bf16_rev" "false")]
7471 ;; Predicated floating-point ternary operations with merging.
7472 (define_expand "@cond_<optab><mode>"
7473 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7474 (unspec:SVE_FULL_F_BF
7475 [(match_operand:<VPRED> 1 "register_operand")
7476 (unspec:SVE_FULL_F_BF
7478 (const_int SVE_STRICT_GP)
7479 (match_operand:SVE_FULL_F_BF 2 "register_operand")
7480 (match_operand:SVE_FULL_F_BF 3 "register_operand")
7481 (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7482 SVE_COND_FP_TERNARY)
7483 (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
7485 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7487 /* Swap the multiplication operands if the fallback value is the
7488 second of the two. */
7489 if (rtx_equal_p (operands[3], operands[5]))
7490 std::swap (operands[2], operands[3]);
7493 ;; Predicated floating-point ternary operations, merging with the
7495 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
7496 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7498 [(match_operand:<VPRED> 1 "register_operand")
7501 (const_int SVE_RELAXED_GP)
7502 (match_operand:SVE_FULL_F 2 "register_operand")
7503 (match_operand:SVE_FULL_F 3 "register_operand")
7504 (match_operand:SVE_FULL_F 4 "register_operand")]
7505 SVE_COND_FP_TERNARY)
7509 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7510 [ w , Upl , 0 , w , w ; * ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7511 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7513 "&& !rtx_equal_p (operands[1], operands[5])"
7515 operands[5] = copy_rtx (operands[1]);
7519 (define_insn "*cond_<optab><mode>_2_strict"
7520 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7522 [(match_operand:<VPRED> 1 "register_operand")
7525 (const_int SVE_STRICT_GP)
7526 (match_operand:SVE_FULL_F 2 "register_operand")
7527 (match_operand:SVE_FULL_F 3 "register_operand")
7528 (match_operand:SVE_FULL_F 4 "register_operand")]
7529 SVE_COND_FP_TERNARY)
7533 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7534 [ w , Upl , 0 , w , w ; * ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7535 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %2\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7539 ;; Predicated floating-point ternary operations, merging with the
7541 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7542 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7543 (unspec:SVE_FULL_F_BF
7544 [(match_operand:<VPRED> 1 "register_operand")
7545 (unspec:SVE_FULL_F_BF
7547 (const_int SVE_RELAXED_GP)
7548 (match_operand:SVE_FULL_F_BF 2 "register_operand")
7549 (match_operand:SVE_FULL_F_BF 3 "register_operand")
7550 (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7551 SVE_COND_FP_TERNARY)
7554 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7555 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7556 [ w , Upl , w , w , 0 ; * ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7557 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7559 "&& !rtx_equal_p (operands[1], operands[5])"
7561 operands[5] = copy_rtx (operands[1]);
7565 (define_insn "*cond_<optab><mode>_4_strict"
7566 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7567 (unspec:SVE_FULL_F_BF
7568 [(match_operand:<VPRED> 1 "register_operand")
7569 (unspec:SVE_FULL_F_BF
7571 (const_int SVE_STRICT_GP)
7572 (match_operand:SVE_FULL_F_BF 2 "register_operand")
7573 (match_operand:SVE_FULL_F_BF 3 "register_operand")
7574 (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7575 SVE_COND_FP_TERNARY)
7578 "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
7579 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7580 [ w , Upl , w , w , 0 ; * ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7581 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7585 ;; Predicated floating-point ternary operations, merging with an
7586 ;; independent value.
7587 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7588 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7589 (unspec:SVE_FULL_F_BF
7590 [(match_operand:<VPRED> 1 "register_operand")
7591 (unspec:SVE_FULL_F_BF
7593 (const_int SVE_RELAXED_GP)
7594 (match_operand:SVE_FULL_F_BF 2 "register_operand")
7595 (match_operand:SVE_FULL_F_BF 3 "register_operand")
7596 (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7597 SVE_COND_FP_TERNARY)
7598 (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
7601 && (<supports_bf16> || !<is_bf16>)
7602 && (<is_bf16> || !rtx_equal_p (operands[2], operands[5]))
7603 && (<is_bf16> || !rtx_equal_p (operands[3], operands[5]))
7604 && !rtx_equal_p (operands[4], operands[5])"
7605 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ; attrs: is_rev ]
7606 [ &w , Upl , w , w , w , Dz ; * ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7607 [ &w , Upl , w , w , 0 , Dz ; * ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7608 [ &w , Upl , 0 , w , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7609 [ &w , Upl , w , 0 , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7610 [ &w , Upl , w , w , w , 0 ; * ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7611 [ ?&w , Upl , w , w , w , w ; * ] #
7615 if (reload_completed
7616 && register_operand (operands[5], <MODE>mode)
7617 && !rtx_equal_p (operands[0], operands[5]))
7619 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7620 operands[5], operands[1]));
7621 operands[5] = operands[4] = operands[0];
7623 else if (!rtx_equal_p (operands[1], operands[6]))
7624 operands[6] = copy_rtx (operands[1]);
7628 [(set_attr "movprfx" "yes")
7629 (set_attr "is_bf16" "<is_bf16>")
7630 (set_attr "supports_bf16_rev" "false")]
7633 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7634 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7635 (unspec:SVE_FULL_F_BF
7636 [(match_operand:<VPRED> 1 "register_operand")
7637 (unspec:SVE_FULL_F_BF
7639 (const_int SVE_STRICT_GP)
7640 (match_operand:SVE_FULL_F_BF 2 "register_operand")
7641 (match_operand:SVE_FULL_F_BF 3 "register_operand")
7642 (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7643 SVE_COND_FP_TERNARY)
7644 (match_operand:SVE_FULL_F_BF 5 "aarch64_simd_reg_or_zero")]
7647 && (<supports_bf16> || !<is_bf16>)
7648 && (<is_bf16> || !rtx_equal_p (operands[2], operands[5]))
7649 && (<is_bf16> || !rtx_equal_p (operands[3], operands[5]))
7650 && !rtx_equal_p (operands[4], operands[5])"
7651 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ; attrs: is_rev ]
7652 [ &w , Upl , w , w , w , Dz ; * ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7653 [ &w , Upl , w , w , 0 , Dz ; * ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7654 [ &w , Upl , 0 , w , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
7655 [ &w , Upl , w , 0 , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
7656 [ &w , Upl , w , w , w , 0 ; * ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
7657 [ ?&w , Upl , w , w , w , w ; * ] #
7659 "&& reload_completed
7660 && register_operand (operands[5], <MODE>mode)
7661 && !rtx_equal_p (operands[0], operands[5])"
7663 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7664 operands[5], operands[1]));
7665 operands[5] = operands[4] = operands[0];
7667 [(set_attr "movprfx" "yes")
7668 (set_attr "is_bf16" "<is_bf16>")
7669 (set_attr "supports_bf16_rev" "false")]
7672 ;; Unpredicated FMLA and FMLS by selected lanes. It doesn't seem worth using
7673 ;; (fma ...) since target-independent code won't understand the indexing.
7674 (define_insn "@aarch64_<optab>_lane_<mode>"
7675 [(set (match_operand:SVE_FULL_F_BF 0 "register_operand")
7676 (unspec:SVE_FULL_F_BF
7677 [(match_operand:SVE_FULL_F_BF 1 "register_operand")
7678 (unspec:SVE_FULL_F_BF
7679 [(match_operand:SVE_FULL_F_BF 2 "register_operand")
7680 (match_operand:SI 3 "const_int_operand")]
7681 UNSPEC_SVE_LANE_SELECT)
7682 (match_operand:SVE_FULL_F_BF 4 "register_operand")]
7683 SVE_FP_TERNARY_LANE))]
7685 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7686 [ w , w , <sve_lane_con> , 0 ; * ] <b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7687 [ ?&w , w , <sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
7691 ;; -------------------------------------------------------------------------
7692 ;; ---- [FP] Complex multiply-add
7693 ;; -------------------------------------------------------------------------
7694 ;; Includes merging patterns for:
7696 ;; -------------------------------------------------------------------------
7698 ;; Predicated FCMLA.
7699 (define_insn "@aarch64_pred_<optab><mode>"
7700 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7702 [(match_operand:<VPRED> 1 "register_operand")
7703 (match_operand:SI 5 "aarch64_sve_gp_strictness")
7704 (match_operand:SVE_FULL_F 2 "register_operand")
7705 (match_operand:SVE_FULL_F 3 "register_operand")
7706 (match_operand:SVE_FULL_F 4 "register_operand")]
7709 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7710 [ w , Upl , w , w , 0 ; * ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7711 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7715 ;; unpredicated optab pattern for auto-vectorizer
7716 ;; The complex mla/mls operations always need to expand to two instructions.
7717 ;; The first operation does half the computation and the second does the
7718 ;; remainder. Because of this, expand early.
7719 (define_expand "cml<fcmac1><conj_op><mode>4"
7720 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7724 (match_operand:SVE_FULL_F 1 "register_operand")
7725 (match_operand:SVE_FULL_F 2 "register_operand")
7726 (match_operand:SVE_FULL_F 3 "register_operand")]
7730 operands[4] = aarch64_ptrue_reg (<VPRED>mode);
7731 operands[5] = gen_int_mode (SVE_RELAXED_GP, SImode);
7732 rtx tmp = gen_reg_rtx (<MODE>mode);
7734 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, operands[4],
7735 operands[2], operands[1],
7736 operands[3], operands[5]));
7738 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], operands[4],
7739 operands[2], operands[1],
7744 ;; unpredicated optab pattern for auto-vectorizer
7745 ;; The complex mul operations always need to expand to two instructions.
7746 ;; The first operation does half the computation and the second does the
7747 ;; remainder. Because of this, expand early.
7748 (define_expand "cmul<conj_op><mode>3"
7749 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7751 [(match_operand:SVE_FULL_F 1 "register_operand")
7752 (match_operand:SVE_FULL_F 2 "register_operand")]
7756 rtx pred_reg = aarch64_ptrue_reg (<VPRED>mode);
7757 rtx gp_mode = gen_int_mode (SVE_RELAXED_GP, SImode);
7758 rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
7759 rtx tmp = gen_reg_rtx (<MODE>mode);
7761 (gen_aarch64_pred_fcmla<sve_rot1><mode> (tmp, pred_reg,
7762 operands[2], operands[1],
7765 (gen_aarch64_pred_fcmla<sve_rot2><mode> (operands[0], pred_reg,
7766 operands[2], operands[1],
7771 ;; Predicated FCMLA with merging.
7772 (define_expand "@cond_<optab><mode>"
7773 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7775 [(match_operand:<VPRED> 1 "register_operand")
7778 (const_int SVE_STRICT_GP)
7779 (match_operand:SVE_FULL_F 2 "register_operand")
7780 (match_operand:SVE_FULL_F 3 "register_operand")
7781 (match_operand:SVE_FULL_F 4 "register_operand")]
7783 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7788 ;; Predicated FCMLA, merging with the third input.
7789 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
7790 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7792 [(match_operand:<VPRED> 1 "register_operand")
7795 (const_int SVE_RELAXED_GP)
7796 (match_operand:SVE_FULL_F 2 "register_operand")
7797 (match_operand:SVE_FULL_F 3 "register_operand")
7798 (match_operand:SVE_FULL_F 4 "register_operand")]
7803 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7804 [ w , Upl , w , w , 0 ; * ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7805 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7807 "&& !rtx_equal_p (operands[1], operands[5])"
7809 operands[5] = copy_rtx (operands[1]);
7813 (define_insn "*cond_<optab><mode>_4_strict"
7814 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7816 [(match_operand:<VPRED> 1 "register_operand")
7819 (const_int SVE_STRICT_GP)
7820 (match_operand:SVE_FULL_F 2 "register_operand")
7821 (match_operand:SVE_FULL_F 3 "register_operand")
7822 (match_operand:SVE_FULL_F 4 "register_operand")]
7827 {@ [ cons: =0 , 1 , 2 , 3 , 4 ; attrs: movprfx ]
7828 [ w , Upl , w , w , 0 ; * ] fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7829 [ ?&w , Upl , w , w , w ; yes ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7833 ;; Predicated FCMLA, merging with an independent value.
7834 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
7835 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7837 [(match_operand:<VPRED> 1 "register_operand")
7840 (const_int SVE_RELAXED_GP)
7841 (match_operand:SVE_FULL_F 2 "register_operand")
7842 (match_operand:SVE_FULL_F 3 "register_operand")
7843 (match_operand:SVE_FULL_F 4 "register_operand")]
7845 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7847 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7848 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7849 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7850 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7851 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7852 [ ?&w , Upl , w , w , w , w ] #
7856 if (reload_completed
7857 && register_operand (operands[5], <MODE>mode)
7858 && !rtx_equal_p (operands[0], operands[5]))
7860 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7861 operands[5], operands[1]));
7862 operands[5] = operands[4] = operands[0];
7864 else if (!rtx_equal_p (operands[1], operands[6]))
7865 operands[6] = copy_rtx (operands[1]);
7869 [(set_attr "movprfx" "yes")]
7872 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
7873 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7875 [(match_operand:<VPRED> 1 "register_operand")
7878 (const_int SVE_STRICT_GP)
7879 (match_operand:SVE_FULL_F 2 "register_operand")
7880 (match_operand:SVE_FULL_F 3 "register_operand")
7881 (match_operand:SVE_FULL_F 4 "register_operand")]
7883 (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
7885 "TARGET_SVE && !rtx_equal_p (operands[4], operands[5])"
7886 {@ [ cons: =0 , 1 , 2 , 3 , 4 , 5 ]
7887 [ &w , Upl , w , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7888 [ &w , Upl , w , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7889 [ &w , Upl , w , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;fcmla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>, #<rot>
7890 [ ?&w , Upl , w , w , w , w ] #
7892 "&& reload_completed
7893 && register_operand (operands[5], <MODE>mode)
7894 && !rtx_equal_p (operands[0], operands[5])"
7896 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[4],
7897 operands[5], operands[1]));
7898 operands[5] = operands[4] = operands[0];
7900 [(set_attr "movprfx" "yes")]
7903 ;; Unpredicated FCMLA with indexing.
7904 (define_insn "@aarch64_<optab>_lane_<mode>"
7905 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
7906 (unspec:SVE_FULL_HSF
7907 [(match_operand:SVE_FULL_HSF 1 "register_operand")
7908 (unspec:SVE_FULL_HSF
7909 [(match_operand:SVE_FULL_HSF 2 "register_operand")
7910 (match_operand:SI 3 "const_int_operand")]
7911 UNSPEC_SVE_LANE_SELECT)
7912 (match_operand:SVE_FULL_HSF 4 "register_operand")]
7915 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
7916 [ w , w , <sve_lane_pair_con> , 0 ; * ] fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7917 [ ?&w , w , <sve_lane_pair_con> , w ; yes ] movprfx\t%0, %4\;fcmla\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3], #<rot>
7921 ;; -------------------------------------------------------------------------
7922 ;; ---- [FP] Trigonometric multiply-add
7923 ;; -------------------------------------------------------------------------
7926 ;; -------------------------------------------------------------------------
7928 (define_insn "@aarch64_sve_tmad<mode>"
7929 [(set (match_operand:SVE_FULL_F 0 "register_operand")
7931 [(match_operand:SVE_FULL_F 1 "register_operand")
7932 (match_operand:SVE_FULL_F 2 "register_operand")
7933 (match_operand:DI 3 "const_int_operand")]
7936 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
7937 [ w , 0 , w ; * ] ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7938 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;ftmad\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
7942 ;; -------------------------------------------------------------------------
7943 ;; ---- [FP] Bfloat16 long ternary arithmetic (SF,BF,BF)
7944 ;; -------------------------------------------------------------------------
7949 ;; - BFMLSLB (SVE2p1, SME2)
7950 ;; - BFMLSLT (SVE2p1, SME2)
7952 ;; -------------------------------------------------------------------------
7954 (define_insn "@aarch64_sve_<sve_fp_op>vnx4sf"
7955 [(set (match_operand:VNx4SF 0 "register_operand")
7957 [(match_operand:VNx4SF 1 "register_operand")
7958 (match_operand:VNx8BF 2 "register_operand")
7959 (match_operand:VNx8BF 3 "register_operand")]
7960 SVE_BFLOAT_TERNARY_LONG))]
7962 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7963 [ w , 0 , w , w ; * ] <sve_fp_op>\t%0.s, %2.h, %3.h
7964 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h
7968 ;; The immediate range is enforced before generating the instruction.
7969 (define_insn "@aarch64_sve_<sve_fp_op>_lanevnx4sf"
7970 [(set (match_operand:VNx4SF 0 "register_operand")
7972 [(match_operand:VNx4SF 1 "register_operand")
7973 (match_operand:VNx8BF 2 "register_operand")
7974 (match_operand:VNx8BF 3 "register_operand")
7975 (match_operand:SI 4 "const_int_operand")]
7976 SVE_BFLOAT_TERNARY_LONG_LANE))]
7978 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
7979 [ w , 0 , w , y ; * ] <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7980 [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]
7984 ;; -------------------------------------------------------------------------
7985 ;; ---- [FP] Matrix multiply-accumulate
7986 ;; -------------------------------------------------------------------------
7988 ;; - FMMLA (F32MM,F64MM)
7989 ;; -------------------------------------------------------------------------
7991 ;; The mode iterator enforces the target requirements.
7992 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
7993 [(set (match_operand:SVE_MATMULF 0 "register_operand")
7995 [(match_operand:SVE_MATMULF 2 "register_operand")
7996 (match_operand:SVE_MATMULF 3 "register_operand")
7997 (match_operand:SVE_MATMULF 1 "register_operand")]
7999 "TARGET_SVE && TARGET_NON_STREAMING"
8000 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
8001 [ w , 0 , w , w ; * ] <sve_fp_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
8002 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_fp_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
8006 ;; =========================================================================
8007 ;; == Comparisons and selects
8008 ;; =========================================================================
8010 ;; -------------------------------------------------------------------------
8011 ;; ---- [INT,FP] Select based on predicates
8012 ;; -------------------------------------------------------------------------
8013 ;; Includes merging patterns for:
8017 ;; -------------------------------------------------------------------------
8019 ;; vcond_mask operand order: true, false, mask
8020 ;; UNSPEC_SEL operand order: mask, true, false (as for VEC_COND_EXPR)
8021 ;; SEL operand order: mask, true, false
8022 (define_expand "@vcond_mask_<mode><vpred>"
8023 [(set (match_operand:SVE_ALL 0 "register_operand")
8025 [(match_operand:<VPRED> 3 "register_operand")
8026 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
8027 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
8031 if (register_operand (operands[1], <MODE>mode))
8032 operands[2] = force_reg (<MODE>mode, operands[2]);
8038 ;; - a duplicated immediate and a register
8039 ;; - a duplicated immediate and zero
8041 ;; For unpacked vectors, it doesn't really matter whether SEL uses the
8042 ;; the container size or the element size. If SEL used the container size,
8043 ;; it would ignore undefined bits of the predicate but would copy the
8044 ;; upper (undefined) bits of each container along with the defined bits.
8045 ;; If SEL used the element size, it would use undefined bits of the predicate
8046 ;; to select between undefined elements in each input vector. Thus the only
8047 ;; difference is whether the undefined bits in a container always come from
8048 ;; the same input as the defined bits, or whether the choice can vary
8049 ;; independently of the defined bits.
8051 ;; For the other instructions, using the element size is more natural,
8052 ;; so we do that for SEL as well.
8053 (define_insn "*vcond_mask_<mode><vpred>"
8054 [(set (match_operand:SVE_ALL 0 "register_operand")
8056 [(match_operand:<VPRED> 3 "register_operand")
8057 (match_operand:SVE_ALL 1 "aarch64_sve_reg_or_dup_imm")
8058 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
8061 && (!register_operand (operands[1], <MODE>mode)
8062 || register_operand (operands[2], <MODE>mode))"
8063 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
8064 [ w , w , w , Upa ; * ] sel\t%0.<Vetype>, %3, %1.<Vetype>, %2.<Vetype>
8065 [ w , vss , 0 , Upa ; * ] mov\t%0.<Vetype>, %3/m, #%I1
8066 [ w , vss , Dz , Upa ; * ] mov\t%0.<Vetype>, %3/z, #%I1
8067 [ w , Ufc , 0 , Upa ; * ] fmov\t%0.<Vetype>, %3/m, #%1
8068 [ ?w , Ufc , Dz , Upl ; yes ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;fmov\t%0.<Vetype>, %3/m, #%1
8069 [ ?&w , vss , w , Upa ; yes ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, #%I1
8070 [ ?&w , Ufc , w , Upa ; yes ] movprfx\t%0, %2\;fmov\t%0.<Vetype>, %3/m, #%1
8074 ;; Optimize selects between a duplicated scalar variable and another vector,
8075 ;; the latter of which can be a zero constant or a variable. Treat duplicates
8076 ;; of GPRs as being more expensive than duplicates of FPRs, since they
8077 ;; involve a cross-file move.
8078 (define_insn "@aarch64_sel_dup<mode>"
8079 [(set (match_operand:SVE_ALL 0 "register_operand")
8081 [(match_operand:<VPRED> 3 "register_operand")
8082 (vec_duplicate:SVE_ALL
8083 (match_operand:<VEL> 1 "register_operand"))
8084 (match_operand:SVE_ALL 2 "aarch64_simd_reg_or_zero")]
8087 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
8088 [ ?w , r , 0 , Upl ; * ] mov\t%0.<Vetype>, %3/m, %<vwcore>1
8089 [ w , w , 0 , Upl ; * ] mov\t%0.<Vetype>, %3/m, %<Vetype>1
8090 [ ??w , r , Dz , Upl ; yes ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
8091 [ ?&w , w , Dz , Upl ; yes ] movprfx\t%0.<Vetype>, %3/z, %0.<Vetype>\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
8092 [ ??&w , r , w , Upl ; yes ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<vwcore>1
8093 [ ?&w , w , w , Upl ; yes ] movprfx\t%0, %2\;mov\t%0.<Vetype>, %3/m, %<Vetype>1
8097 ;; -------------------------------------------------------------------------
8098 ;; ---- [INT] Comparisons
8099 ;; -------------------------------------------------------------------------
8111 ;; -------------------------------------------------------------------------
8113 ;; Signed integer comparisons. Don't enforce an immediate range here, since
8114 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
8116 (define_expand "vec_cmp<mode><vpred>"
8118 [(set (match_operand:<VPRED> 0 "register_operand")
8119 (match_operator:<VPRED> 1 "comparison_operator"
8120 [(match_operand:SVE_I 2 "register_operand")
8121 (match_operand:SVE_I 3 "nonmemory_operand")]))
8122 (clobber (reg:CC_NZC CC_REGNUM))])]
8125 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
8126 operands[2], operands[3]);
8131 ;; Unsigned integer comparisons. Don't enforce an immediate range here, since
8132 ;; it depends on the comparison; leave it to aarch64_expand_sve_vec_cmp_int
8134 (define_expand "vec_cmpu<mode><vpred>"
8136 [(set (match_operand:<VPRED> 0 "register_operand")
8137 (match_operator:<VPRED> 1 "comparison_operator"
8138 [(match_operand:SVE_I 2 "register_operand")
8139 (match_operand:SVE_I 3 "nonmemory_operand")]))
8140 (clobber (reg:CC_NZC CC_REGNUM))])]
8143 aarch64_expand_sve_vec_cmp_int (operands[0], GET_CODE (operands[1]),
8144 operands[2], operands[3]);
8149 ;; Predicated integer comparisons.
8151 ;; For unpacked vectors, only the lowpart element in each input container
8152 ;; has a defined value, and only the predicate bits associated with
8153 ;; those elements are defined. For example, when comparing two VNx2SIs:
8155 ;; - The VNx2SIs can be seem as VNx2DIs in which the low halves of each
8156 ;; DI container store an SI element. The upper bits of each DI container
8159 ;; - Alternatively, the VNx2SIs can be seen as VNx4SIs in which the
8160 ;; even elements are defined and the odd elements are undefined.
8162 ;; - The associated predicate mode is VNx2BI. This means that only the
8163 ;; low bit in each predicate byte is defined (on input and on output).
8165 ;; - We use a .s comparison to compare VNx2SIs, under the control of a
8166 ;; VNx2BI governing predicate, to produce a VNx2BI result. If we view
8167 ;; the .s operation as operating on VNx4SIs then for odd lanes:
8169 ;; - the input governing predicate bit is undefined
8170 ;; - the SI elements being compared are undefined
8171 ;; - the predicate result bit is therefore undefined, but
8172 ;; - the predicate result bit is in the undefined part of a VNx2BI,
8173 ;; so its value doesn't matter anyway.
8174 (define_insn "@aarch64_pred_cmp<cmp_op><mode>"
8175 [(set (match_operand:<VPRED> 0 "register_operand")
8177 [(match_operand:<VPRED> 1 "register_operand")
8178 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8179 (SVE_INT_CMP:<VPRED>
8180 (match_operand:SVE_I 3 "register_operand")
8181 (match_operand:SVE_I 4 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8183 (clobber (reg:CC_NZC CC_REGNUM))]
8185 {@ [ cons: =0 , 1 , 3 , 4 ; attrs: pred_clobber ]
8186 [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #%4
8187 [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
8188 [ Upa , Upl, w , <sve_imm_con>; no ] ^
8189 [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
8190 [ ?Upl , 0 , w , w ; yes ] ^
8191 [ Upa , Upl, w , w ; no ] ^
8195 ;; Predicated integer comparisons in which both the flag and predicate
8196 ;; results are interesting.
8197 (define_insn_and_rewrite "*cmp<cmp_op><mode>_cc"
8198 [(set (reg:CC_NZC CC_REGNUM)
8200 [(match_operand:VNx16BI 1 "register_operand")
8202 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8205 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8206 (SVE_INT_CMP:<VPRED>
8207 (match_operand:SVE_I 2 "register_operand")
8208 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8211 (set (match_operand:<VPRED> 0 "register_operand")
8215 (SVE_INT_CMP:<VPRED>
8220 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8221 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: pred_clobber ]
8222 [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
8223 [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
8224 [ Upa , Upl, w , <sve_imm_con>; no ] ^
8225 [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
8226 [ ?Upl , 0 , w , w ; yes ] ^
8227 [ Upa , Upl, w , w ; no ] ^
8229 "&& !rtx_equal_p (operands[4], operands[6])"
8231 operands[6] = copy_rtx (operands[4]);
8232 operands[7] = operands[5];
8236 ;; Predicated integer comparisons in which only the flags result is
8238 (define_insn_and_rewrite "*cmp<cmp_op><mode>_ptest"
8239 [(set (reg:CC_NZC CC_REGNUM)
8241 [(match_operand:VNx16BI 1 "register_operand")
8243 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8246 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8247 (SVE_INT_CMP:<VPRED>
8248 (match_operand:SVE_I 2 "register_operand")
8249 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand"))]
8252 (clobber (match_scratch:<VPRED> 0))]
8254 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8255 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
8256 [ &Upa , Upl, w , <sve_imm_con>; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, #%3
8257 [ ?Upl , 0 , w , <sve_imm_con>; yes ] ^
8258 [ Upa , Upl, w , <sve_imm_con>; no ] ^
8259 [ &Upa , Upl, w , w ; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>
8260 [ ?Upl , 0 , w , w ; yes ] ^
8261 [ Upa , Upl, w , w ; no ] ^
8263 "&& !rtx_equal_p (operands[4], operands[6])"
8265 operands[6] = copy_rtx (operands[4]);
8266 operands[7] = operands[5];
8270 ;; Predicated integer comparisons, formed by combining a PTRUE-predicated
8271 ;; comparison with an AND. Split the instruction into its preferred form
8272 ;; at the earliest opportunity, in order to get rid of the redundant
8274 (define_insn_and_split "*cmp<cmp_op><mode>_and"
8275 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8279 (const_int SVE_KNOWN_PTRUE)
8280 (SVE_INT_CMP:<VPRED>
8281 (match_operand:SVE_I 2 "register_operand" "w, w")
8282 (match_operand:SVE_I 3 "aarch64_sve_cmp_<sve_imm_con>_operand" "<sve_imm_con>, w"))]
8284 (match_operand:<VPRED> 1 "register_operand" "Upl, Upl")))
8285 (clobber (reg:CC_NZC CC_REGNUM))]
8293 (const_int SVE_MAYBE_NOT_PTRUE)
8294 (SVE_INT_CMP:<VPRED>
8298 (clobber (reg:CC_NZC CC_REGNUM))])]
8301 ;; Predicated integer wide comparisons.
8302 (define_insn "@aarch64_pred_cmp<cmp_op><mode>_wide"
8303 [(set (match_operand:<VPRED> 0 "register_operand")
8305 [(match_operand:VNx16BI 1 "register_operand")
8306 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8308 [(match_operand:SVE_FULL_BHSI 3 "register_operand")
8309 (match_operand:VNx2DI 4 "register_operand")]
8310 SVE_COND_INT_CMP_WIDE)]
8312 (clobber (reg:CC_NZC CC_REGNUM))]
8314 {@ [ cons: =0, 1 , 2, 3, 4; attrs: pred_clobber ]
8315 [ &Upa , Upl, , w, w; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.d
8316 [ ?Upl , 0 , , w, w; yes ] ^
8317 [ Upa , Upl, , w, w; no ] ^
8321 ;; Predicated integer wide comparisons in which both the flag and
8322 ;; predicate results are interesting.
8323 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_cc"
8324 [(set (reg:CC_NZC CC_REGNUM)
8326 [(match_operand:VNx16BI 1 "register_operand")
8328 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8330 [(match_operand:VNx16BI 6 "register_operand")
8331 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8333 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
8334 (match_operand:VNx2DI 3 "register_operand")]
8335 SVE_COND_INT_CMP_WIDE)]
8338 (set (match_operand:<VPRED> 0 "register_operand")
8345 SVE_COND_INT_CMP_WIDE)]
8348 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8349 {@ [ cons: =0, 1 , 2, 3, 6 ; attrs: pred_clobber ]
8350 [ &Upa , Upl, w, w, Upl; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
8351 [ ?Upl , 0 , w, w, Upl; yes ] ^
8352 [ Upa , Upl, w, w, Upl; no ] ^
8356 ;; Predicated integer wide comparisons in which only the flags result
8358 (define_insn "*aarch64_pred_cmp<cmp_op><mode>_wide_ptest"
8359 [(set (reg:CC_NZC CC_REGNUM)
8361 [(match_operand:VNx16BI 1 "register_operand")
8363 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
8365 [(match_operand:VNx16BI 6 "register_operand")
8366 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
8368 [(match_operand:SVE_FULL_BHSI 2 "register_operand")
8369 (match_operand:VNx2DI 3 "register_operand")]
8370 SVE_COND_INT_CMP_WIDE)]
8373 (clobber (match_scratch:<VPRED> 0))]
8375 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
8376 {@ [ cons: =0, 1 , 2, 3, 6 ; attrs: pred_clobber ]
8377 [ &Upa , Upl, w, w, Upl; yes ] cmp<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.d
8378 [ ?Upl , 0 , w, w, Upl; yes ] ^
8379 [ Upa , Upl, w, w, Upl; no ] ^
8383 ;; -------------------------------------------------------------------------
8384 ;; ---- [INT] While tests
8385 ;; -------------------------------------------------------------------------
8397 ;; -------------------------------------------------------------------------
8405 ;; Set element I of the result if (cmp (plus operand1 J) operand2) is
8406 ;; true for all J in [0, I].
8407 (define_insn "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>"
8408 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8409 (unspec:PRED_ALL [(const_int SVE_WHILE_B)
8410 (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8411 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8413 (clobber (reg:CC_NZC CC_REGNUM))]
8415 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8418 ;; The WHILE instructions set the flags in the same way as a PTEST with
8419 ;; a PTRUE GP. Handle the case in which both results are useful. The GP
8420 ;; operands to the PTEST aren't needed, so we allow them to be anything.
8421 (define_insn_and_rewrite "*while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_cc"
8422 [(set (reg:CC_NZC CC_REGNUM)
8426 (const_int SVE_KNOWN_PTRUE)
8428 [(const_int SVE_WHILE_B)
8429 (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8430 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8433 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8434 (unspec:PRED_ALL [(const_int SVE_WHILE_B)
8439 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8440 ;; Force the compiler to drop the unused predicate operand, so that we
8441 ;; don't have an unnecessary PTRUE.
8442 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8444 operands[3] = CONSTM1_RTX (VNx16BImode);
8445 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8449 ;; Same, but handle the case in which only the flags result is useful.
8450 (define_insn_and_rewrite "@while_<while_optab_cmp><GPI:mode><PRED_ALL:mode>_ptest"
8451 [(set (reg:CC_NZC CC_REGNUM)
8455 (const_int SVE_KNOWN_PTRUE)
8457 [(const_int SVE_WHILE_B)
8458 (match_operand:GPI 1 "aarch64_reg_or_zero" "rZ")
8459 (match_operand:GPI 2 "aarch64_reg_or_zero" "rZ")]
8462 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
8464 "while<cmp_op>\t%0.<PRED_ALL:Vetype>, %<w>1, %<w>2"
8465 ;; Force the compiler to drop the unused predicate operand, so that we
8466 ;; don't have an unnecessary PTRUE.
8467 "&& (!CONSTANT_P (operands[3]) || !CONSTANT_P (operands[4]))"
8469 operands[3] = CONSTM1_RTX (VNx16BImode);
8470 operands[4] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
8474 ;; -------------------------------------------------------------------------
8475 ;; ---- [FP] Direct comparisons
8476 ;; -------------------------------------------------------------------------
8485 ;; -------------------------------------------------------------------------
8487 ;; Floating-point comparisons. All comparisons except FCMUO allow a zero
8488 ;; operand; aarch64_expand_sve_vec_cmp_float handles the case of an FCMUO
8490 (define_expand "vec_cmp<mode><vpred>"
8491 [(set (match_operand:<VPRED> 0 "register_operand")
8492 (match_operator:<VPRED> 1 "comparison_operator"
8493 [(match_operand:SVE_FULL_F 2 "register_operand")
8494 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
8497 aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
8498 operands[2], operands[3], false);
8503 ;; Predicated floating-point comparisons.
8504 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
8505 [(set (match_operand:<VPRED> 0 "register_operand")
8507 [(match_operand:<VPRED> 1 "register_operand")
8508 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8509 (match_operand:SVE_FULL_F 3 "register_operand")
8510 (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
8511 SVE_COND_FP_CMP_I0))]
8513 {@ [ cons: =0 , 1 , 3 , 4 ]
8514 [ Upa , Upl , w , Dz ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, #0.0
8515 [ Upa , Upl , w , w ] fcm<cmp_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
8519 ;; Same for unordered comparisons.
8520 (define_insn "@aarch64_pred_fcmuo<mode>"
8521 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8523 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8524 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8525 (match_operand:SVE_FULL_F 3 "register_operand" "w")
8526 (match_operand:SVE_FULL_F 4 "register_operand" "w")]
8527 UNSPEC_COND_FCMUO))]
8529 "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
8532 ;; Floating-point comparisons predicated on a PTRUE, with the results ANDed
8533 ;; with another predicate P. This does not have the same trapping behavior
8534 ;; as predicating the comparison itself on P, but it's a legitimate fold,
8535 ;; since we can drop any potentially-trapping operations whose results
8538 ;; Split the instruction into its preferred form (below) at the earliest
8539 ;; opportunity, in order to get rid of the redundant operand 1.
8540 (define_insn_and_split "*fcm<cmp_op><mode>_and_combine"
8541 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa, Upa")
8544 [(match_operand:<VPRED> 1)
8545 (const_int SVE_KNOWN_PTRUE)
8546 (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
8547 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
8549 (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
8556 (const_int SVE_MAYBE_NOT_PTRUE)
8559 SVE_COND_FP_CMP_I0))]
8562 ;; Same for unordered comparisons.
8563 (define_insn_and_split "*fcmuo<mode>_and_combine"
8564 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8567 [(match_operand:<VPRED> 1)
8568 (const_int SVE_KNOWN_PTRUE)
8569 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8570 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8572 (match_operand:<VPRED> 4 "register_operand" "Upl")))]
8579 (const_int SVE_MAYBE_NOT_PTRUE)
8582 UNSPEC_COND_FCMUO))]
8585 ;; Similar to *fcm<cmp_op><mode>_and_combine, but for BIC rather than AND.
8586 ;; In this case, we still need a separate NOT/BIC operation, but predicating
8587 ;; the comparison on the BIC operand removes the need for a PTRUE.
8588 (define_insn_and_split "*fcm<cmp_op><mode>_bic_combine"
8589 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8594 [(match_operand:<VPRED> 1)
8595 (const_int SVE_KNOWN_PTRUE)
8596 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8597 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8598 SVE_COND_FP_CMP_I0))
8599 (match_operand:<VPRED> 4 "register_operand" "Upa"))
8600 (match_dup:<VPRED> 1)))
8601 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8608 (const_int SVE_MAYBE_NOT_PTRUE)
8611 SVE_COND_FP_CMP_I0))
8618 if (can_create_pseudo_p ())
8619 operands[5] = gen_reg_rtx (<VPRED>mode);
8623 ;; Make sure that we expand to a nor when the operand 4 of
8624 ;; *fcm<cmp_op><mode>_bic_combine is a not.
8625 (define_insn_and_split "*fcm<cmp_op><mode>_nor_combine"
8626 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8631 [(match_operand:<VPRED> 1)
8632 (const_int SVE_KNOWN_PTRUE)
8633 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8634 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
8635 SVE_COND_FP_CMP_I0))
8637 (match_operand:<VPRED> 4 "register_operand" "Upa")))
8638 (match_dup:<VPRED> 1)))
8639 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8646 (const_int SVE_KNOWN_PTRUE)
8649 SVE_COND_FP_CMP_I0))
8659 if (can_create_pseudo_p ())
8660 operands[5] = gen_reg_rtx (<VPRED>mode);
8664 (define_insn_and_split "*fcmuo<mode>_bic_combine"
8665 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8670 [(match_operand:<VPRED> 1)
8671 (const_int SVE_KNOWN_PTRUE)
8672 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8673 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8675 (match_operand:<VPRED> 4 "register_operand" "Upa"))
8676 (match_dup:<VPRED> 1)))
8677 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8684 (const_int SVE_MAYBE_NOT_PTRUE)
8694 if (can_create_pseudo_p ())
8695 operands[5] = gen_reg_rtx (<VPRED>mode);
8699 ;; Same for unordered comparisons.
8700 (define_insn_and_split "*fcmuo<mode>_nor_combine"
8701 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8706 [(match_operand:<VPRED> 1)
8707 (const_int SVE_KNOWN_PTRUE)
8708 (match_operand:SVE_FULL_F 2 "register_operand" "w")
8709 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8712 (match_operand:<VPRED> 4 "register_operand" "Upa")))
8713 (match_dup:<VPRED> 1)))
8714 (clobber (match_scratch:<VPRED> 5 "=&Upl"))]
8721 (const_int SVE_KNOWN_PTRUE)
8734 if (can_create_pseudo_p ())
8735 operands[5] = gen_reg_rtx (<VPRED>mode);
8739 ;; -------------------------------------------------------------------------
8740 ;; ---- [FP] Absolute comparisons
8741 ;; -------------------------------------------------------------------------
8747 ;; -------------------------------------------------------------------------
8749 ;; Predicated floating-point absolute comparisons.
8750 (define_expand "@aarch64_pred_fac<cmp_op><mode>"
8751 [(set (match_operand:<VPRED> 0 "register_operand")
8753 [(match_operand:<VPRED> 1 "register_operand")
8754 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8758 (match_operand:SVE_FULL_F 3 "register_operand")]
8763 (match_operand:SVE_FULL_F 4 "register_operand")]
8765 SVE_COND_FP_ABS_CMP))]
8769 (define_insn_and_rewrite "*aarch64_pred_fac<cmp_op><mode>_relaxed"
8770 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8772 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8773 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8776 (const_int SVE_RELAXED_GP)
8777 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8781 (const_int SVE_RELAXED_GP)
8782 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8784 SVE_COND_FP_ABS_CMP))]
8786 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8787 "&& (!rtx_equal_p (operands[1], operands[5])
8788 || !rtx_equal_p (operands[1], operands[6]))"
8790 operands[5] = copy_rtx (operands[1]);
8791 operands[6] = copy_rtx (operands[1]);
8795 (define_insn "*aarch64_pred_fac<cmp_op><mode>_strict"
8796 [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
8798 [(match_operand:<VPRED> 1 "register_operand" "Upl")
8799 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
8802 (match_operand:SI 5 "aarch64_sve_gp_strictness")
8803 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
8807 (match_operand:SI 6 "aarch64_sve_gp_strictness")
8808 (match_operand:SVE_FULL_F 3 "register_operand" "w")]
8810 SVE_COND_FP_ABS_CMP))]
8812 "fac<cmp_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
8815 ;; -------------------------------------------------------------------------
8816 ;; ---- [PRED] Select
8817 ;; -------------------------------------------------------------------------
8820 ;; -------------------------------------------------------------------------
8822 (define_insn "@vcond_mask_<mode><mode>"
8823 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
8826 (match_operand:PRED_ALL 3 "register_operand" "Upa")
8827 (match_operand:PRED_ALL 1 "register_operand" "Upa"))
8830 (match_operand:PRED_ALL 2 "register_operand" "Upa"))))]
8832 "sel\t%0.b, %3, %1.b, %2.b"
8835 ;; -------------------------------------------------------------------------
8836 ;; ---- [PRED] Test bits
8837 ;; -------------------------------------------------------------------------
8840 ;; -------------------------------------------------------------------------
8842 ;; Branch based on predicate equality or inequality.
8843 (define_expand "cbranch<mode>4"
8846 (match_operator 0 "aarch64_equality_operator"
8847 [(match_operand:PRED_ALL 1 "register_operand")
8848 (match_operand:PRED_ALL 2 "aarch64_simd_reg_or_zero")])
8849 (label_ref (match_operand 3 ""))
8853 rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (<data_bytes>));
8854 rtx cast_ptrue = gen_lowpart (<MODE>mode, ptrue);
8855 rtx ptrue_flag = gen_int_mode (SVE_KNOWN_PTRUE, SImode);
8857 if (operands[2] == CONST0_RTX (<MODE>mode))
8861 pred = gen_reg_rtx (<MODE>mode);
8862 emit_insn (gen_aarch64_pred_xor<mode>_z (pred, cast_ptrue, operands[1],
8865 emit_insn (gen_aarch64_ptest<mode> (ptrue, cast_ptrue, ptrue_flag, pred));
8866 operands[1] = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
8867 operands[2] = const0_rtx;
8871 ;; See "Description of UNSPEC_PTEST" above for details.
8872 (define_insn "aarch64_ptest<mode>"
8873 [(set (reg:CC_NZC CC_REGNUM)
8874 (unspec:CC_NZC [(match_operand:VNx16BI 0 "register_operand" "Upa")
8876 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
8877 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
8883 ;; =========================================================================
8885 ;; =========================================================================
8887 ;; -------------------------------------------------------------------------
8888 ;; ---- [INT,FP] Conditional reductions
8889 ;; -------------------------------------------------------------------------
8893 ;; -------------------------------------------------------------------------
8895 ;; Set operand 0 to the last active element in operand 3, or to tied
8896 ;; operand 1 if no elements are active.
8897 (define_insn "@fold_extract_<last_op>_<mode>"
8898 [(set (match_operand:<VEL> 0 "register_operand")
8900 [(match_operand:<VEL> 1 "register_operand")
8901 (match_operand:<VPRED> 2 "register_operand")
8902 (match_operand:SVE_FULL 3 "register_operand")]
8905 {@ [ cons: =0 , 1 , 2 , 3 ]
8906 [ ?r , 0 , Upl , w ] clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
8907 [ w , 0 , Upl , w ] clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>
8911 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
8912 [(set (match_operand:SVE_FULL 0 "register_operand")
8914 [(match_operand:SVE_FULL 1 "register_operand")
8915 (match_operand:<VPRED> 2 "register_operand")
8916 (match_operand:SVE_FULL 3 "register_operand")]
8919 {@ [ cons: =0 , 1 , 2 , 3 ]
8920 [ w , 0 , Upl , w ] clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8921 [ ?&w , w , Upl , w ] movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
8925 ;; -------------------------------------------------------------------------
8926 ;; ---- [INT] Tree reductions
8927 ;; -------------------------------------------------------------------------
8938 ;; -------------------------------------------------------------------------
8940 ;; Unpredicated integer add reduction.
8941 (define_expand "reduc_plus_scal_<mode>"
8942 [(match_operand:<VEL> 0 "register_operand")
8943 (match_operand:SVE_FULL_I 1 "register_operand")]
8946 rtx pred = aarch64_ptrue_reg (<VPRED>mode);
8947 rtx tmp = <VEL>mode == DImode ? operands[0] : gen_reg_rtx (DImode);
8948 emit_insn (gen_aarch64_pred_reduc_uadd_<mode> (tmp, pred, operands[1]));
8949 if (tmp != operands[0])
8950 emit_move_insn (operands[0], gen_lowpart (<VEL>mode, tmp));
8955 ;; Predicated integer add reduction. The result is always 64-bits.
8956 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8957 [(set (match_operand:DI 0 "register_operand" "=w")
8958 (unspec:DI [(match_operand:<VPRED> 1 "register_operand" "Upl")
8959 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8961 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
8962 "<su>addv\t%d0, %1, %2.<Vetype>"
8965 ;; Unpredicated integer reductions.
8966 (define_expand "reduc_<optab>_scal_<mode>"
8967 [(set (match_operand:<VEL> 0 "register_operand")
8968 (unspec:<VEL> [(match_dup 2)
8969 (match_operand:SVE_FULL_I 1 "register_operand")]
8970 SVE_INT_REDUCTION))]
8973 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
8977 ;; Predicated integer reductions.
8978 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
8979 [(set (match_operand:<VEL> 0 "register_operand" "=w")
8980 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
8981 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
8982 SVE_INT_REDUCTION))]
8984 "<sve_int_op>\t%<Vetype>0, %1, %2.<Vetype>"
8987 ;; -------------------------------------------------------------------------
8988 ;; ---- [FP] Tree reductions
8989 ;; -------------------------------------------------------------------------
8996 ;; -------------------------------------------------------------------------
8998 ;; Unpredicated floating-point tree reductions.
8999 (define_expand "reduc_<optab>_scal_<mode>"
9000 [(set (match_operand:<VEL> 0 "register_operand")
9001 (unspec:<VEL> [(match_dup 2)
9002 (match_operand:SVE_FULL_F 1 "register_operand")]
9006 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9010 (define_expand "reduc_<fmaxmin>_scal_<mode>"
9011 [(match_operand:<VEL> 0 "register_operand")
9012 (unspec:<VEL> [(match_operand:SVE_FULL_F 1 "register_operand")]
9016 emit_insn (gen_reduc_<optab>_scal_<mode> (operands[0], operands[1]));
9021 ;; Predicated floating-point tree reductions.
9022 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
9023 [(set (match_operand:<VEL> 0 "register_operand" "=w")
9024 (unspec:<VEL> [(match_operand:<VPRED> 1 "register_operand" "Upl")
9025 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
9028 "<sve_fp_op>\t%<Vetype>0, %1, %2.<Vetype>"
9031 ;; -------------------------------------------------------------------------
9032 ;; ---- [FP] Left-to-right reductions
9033 ;; -------------------------------------------------------------------------
9036 ;; -------------------------------------------------------------------------
9038 ;; Unpredicated in-order FP reductions.
9039 (define_expand "fold_left_plus_<mode>"
9040 [(set (match_operand:<VEL> 0 "register_operand")
9041 (unspec:<VEL> [(match_dup 3)
9042 (match_operand:<VEL> 1 "register_operand")
9043 (match_operand:SVE_FULL_F 2 "register_operand")]
9045 "TARGET_SVE && TARGET_NON_STREAMING"
9047 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
9051 ;; Predicated in-order FP reductions.
9052 (define_insn "mask_fold_left_plus_<mode>"
9053 [(set (match_operand:<VEL> 0 "register_operand" "=w")
9054 (unspec:<VEL> [(match_operand:<VPRED> 3 "register_operand" "Upl")
9055 (match_operand:<VEL> 1 "register_operand" "0")
9056 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
9058 "TARGET_SVE && TARGET_NON_STREAMING"
9059 "fadda\t%<Vetype>0, %3, %<Vetype>0, %2.<Vetype>"
9062 ;; =========================================================================
9064 ;; =========================================================================
9066 ;; -------------------------------------------------------------------------
9067 ;; ---- [INT,FP] General permutes
9068 ;; -------------------------------------------------------------------------
9072 ;; -------------------------------------------------------------------------
9074 (define_expand "vec_perm<mode>"
9075 [(match_operand:SVE_FULL 0 "register_operand")
9076 (match_operand:SVE_FULL 1 "register_operand")
9077 (match_operand:SVE_FULL 2 "register_operand")
9078 (match_operand:<V_INT_EQUIV> 3 "aarch64_sve_vec_perm_operand")]
9079 "TARGET_SVE && GET_MODE_NUNITS (<MODE>mode).is_constant ()"
9081 aarch64_expand_sve_vec_perm (operands[0], operands[1],
9082 operands[2], operands[3]);
9087 (define_insn "@aarch64_sve_<perm_insn><mode>"
9088 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9090 [(match_operand:SVE_FULL 1 "register_operand" "w")
9091 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
9094 "<perm_insn>\t%0.<Vetype>, {%1.<Vetype>}, %2.<Vetype>"
9097 ;; -------------------------------------------------------------------------
9098 ;; ---- [INT,FP] Special-purpose unary permutes
9099 ;; -------------------------------------------------------------------------
9104 ;; -------------------------------------------------------------------------
9106 ;; Compact active elements and pad with zeros.
9107 (define_insn "@aarch64_sve_compact<mode>"
9108 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
9110 [(match_operand:<VPRED> 1 "register_operand" "Upl")
9111 (match_operand:SVE_FULL_SD 2 "register_operand" "w")]
9112 UNSPEC_SVE_COMPACT))]
9113 "TARGET_SVE && TARGET_NON_STREAMING"
9114 "compact\t%0.<Vetype>, %1, %2.<Vetype>"
9117 ;; Duplicate one element of a vector.
9118 (define_insn "@aarch64_sve_dup_lane<mode>"
9119 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9120 (vec_duplicate:SVE_ALL
9122 (match_operand:SVE_ALL 1 "register_operand" "w")
9123 (parallel [(match_operand:SI 2 "const_int_operand")]))))]
9125 && IN_RANGE (INTVAL (operands[2]) * <container_bits> / 8, 0, 63)"
9126 "dup\t%0.<Vctype>, %1.<Vctype>[%2]"
9129 ;; Use DUP.Q to duplicate a 128-bit segment of a register.
9131 ;; The vec_select:<V128> sets memory lane number N of the V128 to lane
9132 ;; number op2 + N of op1. (We don't need to distinguish between memory
9133 ;; and architectural register lane numbering for op1 or op0, since the
9134 ;; two numbering schemes are the same for SVE.)
9136 ;; The vec_duplicate:SVE_FULL then copies memory lane number N of the
9137 ;; V128 (and thus lane number op2 + N of op1) to lane numbers N + I * STEP
9138 ;; of op0. We therefore get the correct result for both endiannesses.
9140 ;; The wrinkle is that for big-endian V128 registers, memory lane numbering
9141 ;; is in the opposite order to architectural register lane numbering.
9142 ;; Thus if we were to do this operation via a V128 temporary register,
9143 ;; the vec_select and vec_duplicate would both involve a reverse operation
9144 ;; for big-endian targets. In this fused pattern the two reverses cancel
9146 (define_insn "@aarch64_sve_dupq_lane<mode>"
9147 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9148 (vec_duplicate:SVE_FULL
9150 (match_operand:SVE_FULL 1 "register_operand" "w")
9151 (match_operand 2 "ascending_int_parallel"))))]
9153 && (INTVAL (XVECEXP (operands[2], 0, 0))
9154 * GET_MODE_SIZE (<VEL>mode)) % 16 == 0
9155 && IN_RANGE (INTVAL (XVECEXP (operands[2], 0, 0))
9156 * GET_MODE_SIZE (<VEL>mode), 0, 63)"
9158 unsigned int byte = (INTVAL (XVECEXP (operands[2], 0, 0))
9159 * GET_MODE_SIZE (<VEL>mode));
9160 operands[2] = gen_int_mode (byte / 16, DImode);
9161 return "dup\t%0.q, %1.q[%2]";
9165 ;; Reverse the order of elements within a full vector.
9166 (define_insn "@aarch64_sve_rev<mode>"
9167 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9169 [(match_operand:SVE_ALL 1 "register_operand" "w")]
9172 "rev\t%0.<Vctype>, %1.<Vctype>")
9174 ;; -------------------------------------------------------------------------
9175 ;; ---- [INT,FP] Special-purpose binary permutes
9176 ;; -------------------------------------------------------------------------
9190 ;; -------------------------------------------------------------------------
9192 ;; Like EXT, but start at the first active element.
9193 (define_insn "@aarch64_sve_splice<mode>"
9194 [(set (match_operand:SVE_FULL 0 "register_operand")
9196 [(match_operand:<VPRED> 1 "register_operand")
9197 (match_operand:SVE_FULL 2 "register_operand")
9198 (match_operand:SVE_FULL 3 "register_operand")]
9199 UNSPEC_SVE_SPLICE))]
9201 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9202 [ w , Upl , 0 , w ; * ] splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
9203 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;splice\t%0.<Vetype>, %1, %0.<Vetype>, %3.<Vetype>
9207 ;; Permutes that take half the elements from one vector and half the
9208 ;; elements from the other.
9209 (define_insn "@aarch64_sve_<perm_insn><mode>"
9210 [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
9212 [(match_operand:SVE_ALL 1 "register_operand" "w")
9213 (match_operand:SVE_ALL 2 "register_operand" "w")]
9216 "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
9219 ;; Apply PERMUTE to 128-bit sequences. The behavior of these patterns
9220 ;; doesn't depend on the mode.
9221 (define_insn "@aarch64_sve_<optab><mode>"
9222 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
9224 [(match_operand:SVE_FULL 1 "register_operand" "w")
9225 (match_operand:SVE_FULL 2 "register_operand" "w")]
9228 "<perm_insn>\t%0.q, %1.q, %2.q"
9231 ;; Concatenate two vectors and extract a subvector. Note that the
9232 ;; immediate (third) operand is the lane index not the byte index.
9233 (define_insn "@aarch64_sve_ext<mode>"
9234 [(set (match_operand:SVE_ALL 0 "register_operand" "=w, ?&w")
9236 [(match_operand:SVE_ALL 1 "register_operand" "0, w")
9237 (match_operand:SVE_ALL 2 "register_operand" "w, w")
9238 (match_operand:SI 3 "const_int_operand")]
9241 && IN_RANGE (INTVAL (operands[3]) * <container_bits> / 8, 0, 255)"
9243 operands[3] = GEN_INT (INTVAL (operands[3]) * <container_bits> / 8);
9244 return (which_alternative == 0
9245 ? "ext\\t%0.b, %0.b, %2.b, #%3"
9246 : "movprfx\t%0, %1\;ext\\t%0.b, %0.b, %2.b, #%3");
9248 [(set_attr "movprfx" "*,yes")]
9251 ;; -------------------------------------------------------------------------
9252 ;; ---- [PRED] Special-purpose unary permutes
9253 ;; -------------------------------------------------------------------------
9256 ;; -------------------------------------------------------------------------
9258 (define_insn "@aarch64_sve_rev<mode>"
9259 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9260 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")]
9263 "rev\t%0.<Vetype>, %1.<Vetype>")
9265 ;; -------------------------------------------------------------------------
9266 ;; ---- [PRED] Special-purpose binary permutes
9267 ;; -------------------------------------------------------------------------
9275 ;; -------------------------------------------------------------------------
9277 ;; Permutes that take half the elements from one vector and half the
9278 ;; elements from the other.
9279 (define_insn "@aarch64_sve_<perm_insn><mode>"
9280 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
9281 (unspec:PRED_ALL [(match_operand:PRED_ALL 1 "register_operand" "Upa")
9282 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
9285 "<perm_insn>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9288 ;; Special purpose permute used by the predicate generation instructions.
9289 ;; Unlike the normal permute patterns, these instructions operate on VNx16BI
9290 ;; regardless of the element size, so that all input and output bits are
9291 ;; well-defined. Operand 3 then indicates the size of the permute.
9292 (define_insn "@aarch64_sve_trn1_conv<mode>"
9293 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
9294 (unspec:VNx16BI [(match_operand:VNx16BI 1 "register_operand" "Upa")
9295 (match_operand:VNx16BI 2 "register_operand" "Upa")
9296 (match_operand:PRED_ALL 3 "aarch64_simd_imm_zero")]
9299 "trn1\t%0.<PRED_ALL:Vetype>, %1.<PRED_ALL:Vetype>, %2.<PRED_ALL:Vetype>"
9302 ;; =========================================================================
9304 ;; =========================================================================
9306 ;; -------------------------------------------------------------------------
9307 ;; ---- [INT<-INT] Packs
9308 ;; -------------------------------------------------------------------------
9311 ;; -------------------------------------------------------------------------
9313 ;; Integer pack. Use UZP1 on the narrower type, which discards
9314 ;; the high part of each wide element.
9315 (define_insn "vec_pack_trunc_<Vwide>"
9316 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand" "=w")
9317 (unspec:SVE_FULL_BHSI
9318 [(match_operand:<VWIDE> 1 "register_operand" "w")
9319 (match_operand:<VWIDE> 2 "register_operand" "w")]
9322 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
9325 ;; Integer partial pack packing two partial SVE types into a single full SVE
9326 ;; type of the same element type. Use UZP1 on the wider type, which discards
9327 ;; the high part of each wide element. This allows to concat SVE partial types
9328 ;; into a wider vector.
9329 (define_insn "@aarch64_pack_partial<mode>"
9330 [(set (match_operand:SVE_NO2E 0 "register_operand" "=w")
9331 (vec_concat:SVE_NO2E
9332 (match_operand:<VHALF> 1 "register_operand" "w")
9333 (match_operand:<VHALF> 2 "register_operand" "w")))]
9335 "uzp1\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
9338 ;; -------------------------------------------------------------------------
9339 ;; ---- [INT<-INT] Unpacks
9340 ;; -------------------------------------------------------------------------
9346 ;; -------------------------------------------------------------------------
9348 ;; Unpack the low or high half of a vector, where "high" refers to
9349 ;; the low-numbered lanes for big-endian and the high-numbered lanes
9350 ;; for little-endian.
9351 (define_expand "vec_unpack<su>_<perm_hilo>_<SVE_FULL_BHSI:mode>"
9352 [(match_operand:<VWIDE> 0 "register_operand")
9354 [(match_operand:SVE_FULL_BHSI 1 "register_operand")] UNPACK)]
9357 emit_insn ((<hi_lanes_optab>
9358 ? gen_aarch64_sve_<su>unpkhi_<SVE_FULL_BHSI:mode>
9359 : gen_aarch64_sve_<su>unpklo_<SVE_FULL_BHSI:mode>)
9360 (operands[0], operands[1]));
9365 (define_insn "@aarch64_sve_<su>unpk<perm_hilo>_<SVE_FULL_BHSI:mode>"
9366 [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
9368 [(match_operand:SVE_FULL_BHSI 1 "register_operand" "w")]
9371 "<su>unpk<perm_hilo>\t%0.<Vewtype>, %1.<Vetype>"
9374 ;; -------------------------------------------------------------------------
9375 ;; ---- [INT<-FP] Conversions
9376 ;; -------------------------------------------------------------------------
9380 ;; -------------------------------------------------------------------------
9382 ;; Unpredicated conversion of floats to integers of the same size (HF to HI,
9383 ;; SF to SI or DF to DI).
9384 (define_expand "<optab><mode><v_int_equiv>2"
9385 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
9386 (unspec:<V_INT_EQUIV>
9388 (const_int SVE_RELAXED_GP)
9389 (match_operand:SVE_FULL_F 1 "register_operand")]
9393 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9397 ;; Predicated float-to-integer conversion, either to the same width or wider.
9398 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
9399 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9400 (unspec:SVE_FULL_HSDI
9401 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9402 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9403 (match_operand:SVE_FULL_F 2 "register_operand")]
9405 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9406 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9407 [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9408 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9412 ;; Predicated narrowing float-to-integer conversion.
9413 (define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9414 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9416 [(match_operand:VNx2BI 1 "register_operand")
9417 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9418 (match_operand:VNx2DF_ONLY 2 "register_operand")]
9421 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9422 [ w , Upl , 0 ; * ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9423 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9427 ;; Predicated float-to-integer conversion with merging, either to the same
9429 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>"
9430 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9431 (unspec:SVE_FULL_HSDI
9432 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9433 (unspec:SVE_FULL_HSDI
9435 (const_int SVE_STRICT_GP)
9436 (match_operand:SVE_FULL_F 2 "register_operand")]
9438 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9440 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9443 ;; The first alternative doesn't need the earlyclobber, but the only case
9444 ;; it would help is the uninteresting one in which operands 2 and 3 are
9445 ;; the same register (despite having different modes). Making all the
9446 ;; alternatives earlyclobber makes things more consistent for the
9447 ;; register allocator.
9448 (define_insn_and_rewrite "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_relaxed"
9449 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9450 (unspec:SVE_FULL_HSDI
9451 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9452 (unspec:SVE_FULL_HSDI
9454 (const_int SVE_RELAXED_GP)
9455 (match_operand:SVE_FULL_F 2 "register_operand")]
9457 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9459 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9460 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9461 [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9462 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9463 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9465 "&& !rtx_equal_p (operands[1], operands[4])"
9467 operands[4] = copy_rtx (operands[1]);
9471 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_F:mode><SVE_FULL_HSDI:mode>_strict"
9472 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
9473 (unspec:SVE_FULL_HSDI
9474 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9475 (unspec:SVE_FULL_HSDI
9477 (const_int SVE_STRICT_GP)
9478 (match_operand:SVE_FULL_F 2 "register_operand")]
9480 (match_operand:SVE_FULL_HSDI 3 "aarch64_simd_reg_or_zero")]
9482 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9483 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9484 [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9485 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9486 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<SVE_FULL_HSDI:Vetype>, %1/m, %2.<SVE_FULL_F:Vetype>
9490 ;; Predicated narrowing float-to-integer conversion with merging.
9491 (define_expand "@cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9492 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9494 [(match_operand:VNx2BI 1 "register_operand")
9497 (const_int SVE_STRICT_GP)
9498 (match_operand:VNx2DF_ONLY 2 "register_operand")]
9500 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9505 (define_insn "*cond_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
9506 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
9508 [(match_operand:VNx2BI 1 "register_operand")
9511 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9512 (match_operand:VNx2DF_ONLY 2 "register_operand")]
9514 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
9517 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9518 [ &w , Upl , w , 0 ; * ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9519 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9520 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
9524 ;; -------------------------------------------------------------------------
9525 ;; ---- [INT<-FP] Packs
9526 ;; -------------------------------------------------------------------------
9527 ;; The patterns in this section are synthetic.
9528 ;; -------------------------------------------------------------------------
9530 ;; Convert two vectors of DF to SI and pack the results into a single vector.
9531 (define_expand "vec_pack_<su>fix_trunc_vnx2df"
9535 (const_int SVE_RELAXED_GP)
9536 (match_operand:VNx2DF 1 "register_operand")]
9541 (const_int SVE_RELAXED_GP)
9542 (match_operand:VNx2DF 2 "register_operand")]
9544 (set (match_operand:VNx4SI 0 "register_operand")
9545 (unspec:VNx4SI [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9548 operands[3] = aarch64_ptrue_reg (VNx2BImode);
9549 operands[4] = gen_reg_rtx (VNx4SImode);
9550 operands[5] = gen_reg_rtx (VNx4SImode);
9554 ;; -------------------------------------------------------------------------
9555 ;; ---- [INT<-FP] Unpacks
9556 ;; -------------------------------------------------------------------------
9557 ;; No patterns here yet!
9558 ;; -------------------------------------------------------------------------
9560 ;; -------------------------------------------------------------------------
9561 ;; ---- [FP<-INT] Conversions
9562 ;; -------------------------------------------------------------------------
9566 ;; -------------------------------------------------------------------------
9568 ;; Unpredicated conversion of integers to floats of the same size
9569 ;; (HI to HF, SI to SF or DI to DF).
9570 (define_expand "<optab><v_int_equiv><mode>2"
9571 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9574 (const_int SVE_RELAXED_GP)
9575 (match_operand:<V_INT_EQUIV> 1 "register_operand")]
9579 operands[2] = aarch64_ptrue_reg (<VPRED>mode);
9583 ;; Predicated integer-to-float conversion, either to the same width or
9585 (define_insn "@aarch64_sve_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9586 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9588 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9589 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9590 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9592 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9593 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9594 [ w , Upl , 0 ; * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9595 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9599 ;; Predicated widening integer-to-float conversion.
9600 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9601 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9603 [(match_operand:VNx2BI 1 "register_operand")
9604 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9605 (match_operand:VNx4SI_ONLY 2 "register_operand")]
9608 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9609 [ w , Upl , 0 ; * ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9610 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9614 ;; Predicated integer-to-float conversion with merging, either to the same
9615 ;; width or narrower.
9616 (define_expand "@cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>"
9617 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9619 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9622 (const_int SVE_STRICT_GP)
9623 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9625 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9627 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9630 ;; The first alternative doesn't need the earlyclobber, but the only case
9631 ;; it would help is the uninteresting one in which operands 2 and 3 are
9632 ;; the same register (despite having different modes). Making all the
9633 ;; alternatives earlyclobber makes things more consistent for the
9634 ;; register allocator.
9635 (define_insn_and_rewrite "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_relaxed"
9636 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9638 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9641 (const_int SVE_RELAXED_GP)
9642 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9644 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9646 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9647 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9648 [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9649 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9650 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9652 "&& !rtx_equal_p (operands[1], operands[4])"
9654 operands[4] = copy_rtx (operands[1]);
9658 (define_insn "*cond_<optab>_nonextend<SVE_FULL_HSDI:mode><SVE_FULL_F:mode>_strict"
9659 [(set (match_operand:SVE_FULL_F 0 "register_operand")
9661 [(match_operand:<SVE_FULL_HSDI:VPRED> 1 "register_operand")
9664 (const_int SVE_STRICT_GP)
9665 (match_operand:SVE_FULL_HSDI 2 "register_operand")]
9667 (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]
9669 "TARGET_SVE && <SVE_FULL_HSDI:elem_bits> >= <SVE_FULL_F:elem_bits>"
9670 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9671 [ &w , Upl , w , 0 ; * ] <su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9672 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_HSDI:Vetype>, %1/z, %2.<SVE_FULL_HSDI:Vetype>\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9673 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<SVE_FULL_F:Vetype>, %1/m, %2.<SVE_FULL_HSDI:Vetype>
9677 ;; Predicated widening integer-to-float conversion with merging.
9678 (define_expand "@cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9679 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9681 [(match_operand:VNx2BI 1 "register_operand")
9684 (const_int SVE_STRICT_GP)
9685 (match_operand:VNx4SI_ONLY 2 "register_operand")]
9687 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9692 (define_insn "*cond_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
9693 [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
9695 [(match_operand:VNx2BI 1 "register_operand")
9698 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9699 (match_operand:VNx4SI_ONLY 2 "register_operand")]
9701 (match_operand:VNx2DF_ONLY 3 "aarch64_simd_reg_or_zero")]
9704 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9705 [ w , Upl , w , 0 ; * ] <su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9706 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<VNx2DF_ONLY:Vetype>, %1/z, %2.<VNx2DF_ONLY:Vetype>\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9707 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<su>cvtf\t%0.<VNx2DF_ONLY:Vetype>, %1/m, %2.<VNx4SI_ONLY:Vetype>
9711 ;; -------------------------------------------------------------------------
9712 ;; ---- [FP<-INT] Packs
9713 ;; -------------------------------------------------------------------------
9714 ;; No patterns here yet!
9715 ;; -------------------------------------------------------------------------
9717 ;; -------------------------------------------------------------------------
9718 ;; ---- [FP<-INT] Unpacks
9719 ;; -------------------------------------------------------------------------
9720 ;; The patterns in this section are synthetic.
9721 ;; -------------------------------------------------------------------------
9723 ;; Unpack one half of a VNx4SI to VNx2DF. First unpack from VNx4SI
9724 ;; to VNx2DI, reinterpret the VNx2DI as a VNx4SI, then convert the
9725 ;; unpacked VNx4SI to VNx2DF.
9726 (define_expand "vec_unpack<su_optab>_float_<perm_hilo>_vnx4si"
9727 [(match_operand:VNx2DF 0 "register_operand")
9729 (unspec:VNx2DI [(match_operand:VNx4SI 1 "register_operand")]
9733 /* Use ZIP to do the unpack, since we don't care about the upper halves
9734 and since it has the nice property of not needing any subregs.
9735 If using UUNPK* turns out to be preferable, we could model it as
9736 a ZIP whose first operand is zero. */
9737 rtx temp = gen_reg_rtx (VNx4SImode);
9738 emit_insn ((<hi_lanes_optab>
9739 ? gen_aarch64_sve_zip2vnx4si
9740 : gen_aarch64_sve_zip1vnx4si)
9741 (temp, operands[1], operands[1]));
9742 rtx ptrue = aarch64_ptrue_reg (VNx2BImode);
9743 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9744 emit_insn (gen_aarch64_sve_<FLOATUORS:optab>_extendvnx4sivnx2df
9745 (operands[0], ptrue, temp, strictness));
9750 ;; -------------------------------------------------------------------------
9751 ;; ---- [FP<-FP] Packs
9752 ;; -------------------------------------------------------------------------
9755 ;; -------------------------------------------------------------------------
9757 ;; Convert two vectors of DF to SF, or two vectors of SF to HF, and pack
9758 ;; the results into a single vector.
9759 (define_expand "vec_pack_trunc_<Vwide>"
9761 (unspec:SVE_FULL_HSF
9763 (const_int SVE_RELAXED_GP)
9764 (match_operand:<VWIDE> 1 "register_operand")]
9767 (unspec:SVE_FULL_HSF
9769 (const_int SVE_RELAXED_GP)
9770 (match_operand:<VWIDE> 2 "register_operand")]
9772 (set (match_operand:SVE_FULL_HSF 0 "register_operand")
9773 (unspec:SVE_FULL_HSF [(match_dup 4) (match_dup 5)] UNSPEC_UZP1))]
9776 operands[3] = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9777 operands[4] = gen_reg_rtx (<MODE>mode);
9778 operands[5] = gen_reg_rtx (<MODE>mode);
9782 ;; Predicated float-to-float truncation.
9783 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9784 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9785 (unspec:SVE_FULL_HSF
9786 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9787 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9788 (match_operand:SVE_FULL_SDF 2 "register_operand")]
9790 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9791 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9792 [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9793 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9797 ;; Predicated float-to-float truncation with merging.
9798 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9799 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9800 (unspec:SVE_FULL_HSF
9801 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9802 (unspec:SVE_FULL_HSF
9804 (const_int SVE_STRICT_GP)
9805 (match_operand:SVE_FULL_SDF 2 "register_operand")]
9807 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9809 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9812 (define_insn "*cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
9813 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
9814 (unspec:SVE_FULL_HSF
9815 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9816 (unspec:SVE_FULL_HSF
9818 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9819 (match_operand:SVE_FULL_SDF 2 "register_operand")]
9821 (match_operand:SVE_FULL_HSF 3 "aarch64_simd_reg_or_zero")]
9823 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9824 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9825 [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9826 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9827 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_HSF:Vetype>, %1/m, %2.<SVE_FULL_SDF:Vetype>
9831 ;; -------------------------------------------------------------------------
9832 ;; ---- [FP<-FP] Packs (bfloat16)
9833 ;; -------------------------------------------------------------------------
9837 ;; -------------------------------------------------------------------------
9839 ;; Predicated BFCVT.
9840 (define_insn "@aarch64_sve_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9841 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9843 [(match_operand:VNx4BI 1 "register_operand")
9844 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9845 (match_operand:VNx4SF_ONLY 2 "register_operand")]
9848 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9849 [ w , Upl , 0 ; * ] bfcvt\t%0.h, %1/m, %2.s
9850 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;bfcvt\t%0.h, %1/m, %2.s
9854 ;; Predicated BFCVT with merging.
9855 (define_expand "@cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9856 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9858 [(match_operand:VNx4BI 1 "register_operand")
9861 (const_int SVE_STRICT_GP)
9862 (match_operand:VNx4SF_ONLY 2 "register_operand")]
9864 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9869 (define_insn "*cond_<optab>_trunc<VNx4SF_ONLY:mode><VNx8BF_ONLY:mode>"
9870 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
9872 [(match_operand:VNx4BI 1 "register_operand")
9875 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9876 (match_operand:VNx4SF_ONLY 2 "register_operand")]
9878 (match_operand:VNx8BF_ONLY 3 "aarch64_simd_reg_or_zero")]
9881 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9882 [ w , Upl , w , 0 ; * ] bfcvt\t%0.h, %1/m, %2.s
9883 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.s, %1/z, %2.s\;bfcvt\t%0.h, %1/m, %2.s
9884 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;bfcvt\t%0.h, %1/m, %2.s
9888 ;; Predicated BFCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
9889 ;; pair because the even elements always have to be supplied for active
9890 ;; elements, even if the inactive elements don't matter.
9892 ;; This instructions does not take MOVPRFX.
9893 (define_insn "@aarch64_sve_cvtnt<mode>"
9894 [(set (match_operand:VNx8BF_ONLY 0 "register_operand" "=w")
9896 [(match_operand:VNx4BI 2 "register_operand" "Upl")
9897 (const_int SVE_STRICT_GP)
9898 (match_operand:VNx8BF_ONLY 1 "register_operand" "0")
9899 (match_operand:VNx4SF 3 "register_operand" "w")]
9900 UNSPEC_COND_FCVTNT))]
9902 "bfcvtnt\t%0.h, %2/m, %3.s"
9905 ;; -------------------------------------------------------------------------
9906 ;; ---- [FP<-FP] Unpacks
9907 ;; -------------------------------------------------------------------------
9910 ;; -------------------------------------------------------------------------
9912 ;; Unpack one half of a VNx4SF to VNx2DF, or one half of a VNx8HF to VNx4SF.
9913 ;; First unpack the source without conversion, then float-convert the
9915 (define_expand "vec_unpacks_<perm_hilo>_<mode>"
9916 [(match_operand:<VWIDE> 0 "register_operand")
9917 (unspec:SVE_FULL_HSF
9918 [(match_operand:SVE_FULL_HSF 1 "register_operand")]
9922 /* Use ZIP to do the unpack, since we don't care about the upper halves
9923 and since it has the nice property of not needing any subregs.
9924 If using UUNPK* turns out to be preferable, we could model it as
9925 a ZIP whose first operand is zero. */
9926 rtx temp = gen_reg_rtx (<MODE>mode);
9927 emit_insn ((<hi_lanes_optab>
9928 ? gen_aarch64_sve_zip2<mode>
9929 : gen_aarch64_sve_zip1<mode>)
9930 (temp, operands[1], operands[1]));
9931 rtx ptrue = aarch64_ptrue_reg (<VWIDE_PRED>mode);
9932 rtx strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
9933 emit_insn (gen_aarch64_sve_fcvt_nontrunc<mode><Vwide>
9934 (operands[0], ptrue, temp, strictness));
9939 ;; Predicated float-to-float extension.
9940 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9941 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9942 (unspec:SVE_FULL_SDF
9943 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9944 (match_operand:SI 3 "aarch64_sve_gp_strictness")
9945 (match_operand:SVE_FULL_HSF 2 "register_operand")]
9947 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9948 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
9949 [ w , Upl , 0 ; * ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9950 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9954 ;; Predicated float-to-float extension with merging.
9955 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9956 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9957 (unspec:SVE_FULL_SDF
9958 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9959 (unspec:SVE_FULL_SDF
9961 (const_int SVE_STRICT_GP)
9962 (match_operand:SVE_FULL_HSF 2 "register_operand")]
9964 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9966 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9969 (define_insn "*cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
9970 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
9971 (unspec:SVE_FULL_SDF
9972 [(match_operand:<SVE_FULL_SDF:VPRED> 1 "register_operand")
9973 (unspec:SVE_FULL_SDF
9975 (match_operand:SI 4 "aarch64_sve_gp_strictness")
9976 (match_operand:SVE_FULL_HSF 2 "register_operand")]
9978 (match_operand:SVE_FULL_SDF 3 "aarch64_simd_reg_or_zero")]
9980 "TARGET_SVE && <SVE_FULL_SDF:elem_bits> > <SVE_FULL_HSF:elem_bits>"
9981 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
9982 [ w , Upl , w , 0 ; * ] fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9983 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<SVE_FULL_SDF:Vetype>, %1/z, %2.<SVE_FULL_SDF:Vetype>\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9984 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;fcvt\t%0.<SVE_FULL_SDF:Vetype>, %1/m, %2.<SVE_FULL_HSF:Vetype>
9988 ;; -------------------------------------------------------------------------
9989 ;; ---- [PRED<-PRED] Packs
9990 ;; -------------------------------------------------------------------------
9993 ;; -------------------------------------------------------------------------
9995 ;; Predicate pack. Use UZP1 on the narrower type, which discards
9996 ;; the high part of each wide element.
9997 (define_insn "vec_pack_trunc_<Vwide>"
9998 [(set (match_operand:PRED_BHS 0 "register_operand" "=Upa")
10000 [(match_operand:<VWIDE> 1 "register_operand" "Upa")
10001 (match_operand:<VWIDE> 2 "register_operand" "Upa")]
10004 "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
10007 ;; -------------------------------------------------------------------------
10008 ;; ---- [PRED<-PRED] Unpacks
10009 ;; -------------------------------------------------------------------------
10013 ;; -------------------------------------------------------------------------
10015 ;; Unpack the low or high half of a predicate, where "high" refers to
10016 ;; the low-numbered lanes for big-endian and the high-numbered lanes
10017 ;; for little-endian.
10018 (define_expand "vec_unpack<su>_<perm_hilo>_<mode>"
10019 [(match_operand:<VWIDE> 0 "register_operand")
10020 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand")]
10024 emit_insn ((<hi_lanes_optab>
10025 ? gen_aarch64_sve_punpkhi_<PRED_BHS:mode>
10026 : gen_aarch64_sve_punpklo_<PRED_BHS:mode>)
10027 (operands[0], operands[1]));
10032 (define_insn "@aarch64_sve_punpk<perm_hilo>_<mode>"
10033 [(set (match_operand:<VWIDE> 0 "register_operand" "=Upa")
10034 (unspec:<VWIDE> [(match_operand:PRED_BHS 1 "register_operand" "Upa")]
10037 "punpk<perm_hilo>\t%0.h, %1.b"
10040 ;; =========================================================================
10041 ;; == Vector partitioning
10042 ;; =========================================================================
10044 ;; -------------------------------------------------------------------------
10045 ;; ---- [PRED] Unary partitioning
10046 ;; -------------------------------------------------------------------------
10052 ;; -------------------------------------------------------------------------
10054 ;; Note that unlike most other instructions that have both merging and
10055 ;; zeroing forms, these instructions don't operate elementwise and so
10056 ;; don't fit the IFN_COND model.
10057 (define_insn "@aarch64_brk<brk_op>"
10058 [(set (match_operand:VNx16BI 0 "register_operand")
10060 [(match_operand:VNx16BI 1 "register_operand")
10061 (match_operand:VNx16BI 2 "register_operand")
10062 (match_operand:VNx16BI 3 "aarch64_simd_reg_or_zero")]
10065 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: pred_clobber ]
10066 [ &Upa , Upa , Upa , Dz; yes ] brk<brk_op>\t%0.b, %1/z, %2.b
10067 [ ?Upa , 0Upa, 0Upa, Dz; yes ] ^
10068 [ Upa , Upa , Upa , Dz; no ] ^
10069 [ &Upa , Upa , Upa , 0 ; yes ] brk<brk_op>\t%0.b, %1/m, %2.b
10070 [ ?Upa , 0Upa, 0Upa, 0 ; yes ] ^
10071 [ Upa , Upa , Upa , 0 ; no ] ^
10075 ;; Same, but also producing a flags result.
10076 (define_insn "*aarch64_brk<brk_op>_cc"
10077 [(set (reg:CC_NZC CC_REGNUM)
10079 [(match_operand:VNx16BI 1 "register_operand")
10081 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10084 (match_operand:VNx16BI 2 "register_operand")
10085 (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
10088 (set (match_operand:VNx16BI 0 "register_operand")
10095 {@ [ cons: =0, 1 , 2 ; attrs: pred_clobber ]
10096 [ &Upa , Upa , Upa ; yes ] brk<brk_op>s\t%0.b, %1/z, %2.b
10097 [ ?Upa , 0Upa, 0Upa; yes ] ^
10098 [ Upa , Upa , Upa ; no ] ^
10102 ;; Same, but with only the flags result being interesting.
10103 (define_insn "*aarch64_brk<brk_op>_ptest"
10104 [(set (reg:CC_NZC CC_REGNUM)
10106 [(match_operand:VNx16BI 1 "register_operand")
10108 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10111 (match_operand:VNx16BI 2 "register_operand")
10112 (match_operand:VNx16BI 3 "aarch64_simd_imm_zero")]
10115 (clobber (match_scratch:VNx16BI 0))]
10117 {@ [ cons: =0, 1 , 2 ; attrs: pred_clobber ]
10118 [ &Upa , Upa , Upa ; yes ] brk<brk_op>s\t%0.b, %1/z, %2.b
10119 [ ?Upa , 0Upa, 0Upa; yes ] ^
10120 [ Upa , Upa , Upa ; no ] ^
10124 ;; -------------------------------------------------------------------------
10125 ;; ---- [PRED] Binary partitioning
10126 ;; -------------------------------------------------------------------------
10134 ;; -------------------------------------------------------------------------
10136 ;; Binary BRKs (BRKN, BRKPA, BRKPB).
10137 (define_insn "@aarch64_brk<brk_op>"
10138 [(set (match_operand:VNx16BI 0 "register_operand")
10140 [(match_operand:VNx16BI 1 "register_operand")
10141 (match_operand:VNx16BI 2 "register_operand")
10142 (match_operand:VNx16BI 3 "register_operand")]
10145 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
10146 [ &Upa , Upa , Upa , <brk_reg_con> ; yes ] brk<brk_op>\t%0.b, %1/z, %2.b, %<brk_reg_opno>.b
10147 [ ?Upa , 0Upa, 0Upa, 0<brk_reg_con>; yes ] ^
10148 [ Upa , Upa , Upa , <brk_reg_con> ; no ] ^
10152 ;; BRKN, producing both a predicate and a flags result. Unlike other
10153 ;; flag-setting instructions, these flags are always set wrt a ptrue.
10154 (define_insn_and_rewrite "*aarch64_brkn_cc"
10155 [(set (reg:CC_NZC CC_REGNUM)
10157 [(match_operand:VNx16BI 4)
10158 (match_operand:VNx16BI 5)
10159 (const_int SVE_KNOWN_PTRUE)
10161 [(match_operand:VNx16BI 1 "register_operand" "Upa")
10162 (match_operand:VNx16BI 2 "register_operand" "Upa")
10163 (match_operand:VNx16BI 3 "register_operand" "0")]
10166 (set (match_operand:VNx16BI 0 "register_operand" "=Upa")
10173 "brkns\t%0.b, %1/z, %2.b, %0.b"
10174 "&& (operands[4] != CONST0_RTX (VNx16BImode)
10175 || operands[5] != CONST0_RTX (VNx16BImode))"
10177 operands[4] = CONST0_RTX (VNx16BImode);
10178 operands[5] = CONST0_RTX (VNx16BImode);
10182 ;; Same, but with only the flags result being interesting.
10183 (define_insn_and_rewrite "*aarch64_brkn_ptest"
10184 [(set (reg:CC_NZC CC_REGNUM)
10186 [(match_operand:VNx16BI 4)
10187 (match_operand:VNx16BI 5)
10188 (const_int SVE_KNOWN_PTRUE)
10190 [(match_operand:VNx16BI 1 "register_operand" "Upa")
10191 (match_operand:VNx16BI 2 "register_operand" "Upa")
10192 (match_operand:VNx16BI 3 "register_operand" "0")]
10195 (clobber (match_scratch:VNx16BI 0 "=Upa"))]
10197 "brkns\t%0.b, %1/z, %2.b, %0.b"
10198 "&& (operands[4] != CONST0_RTX (VNx16BImode)
10199 || operands[5] != CONST0_RTX (VNx16BImode))"
10201 operands[4] = CONST0_RTX (VNx16BImode);
10202 operands[5] = CONST0_RTX (VNx16BImode);
10206 ;; BRKPA and BRKPB, producing both a predicate and a flags result.
10207 (define_insn "*aarch64_brk<brk_op>_cc"
10208 [(set (reg:CC_NZC CC_REGNUM)
10210 [(match_operand:VNx16BI 1 "register_operand")
10212 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10215 (match_operand:VNx16BI 2 "register_operand")
10216 (match_operand:VNx16BI 3 "register_operand")]
10219 (set (match_operand:VNx16BI 0 "register_operand")
10226 {@ [ cons: =0, 1 , 2 , 3 , 4; attrs: pred_clobber ]
10227 [ &Upa , Upa , Upa , Upa , ; yes ] brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b
10228 [ ?Upa , 0Upa, 0Upa, 0Upa, ; yes ] ^
10229 [ Upa , Upa , Upa , Upa , ; no ] ^
10233 ;; Same, but with only the flags result being interesting.
10234 (define_insn "*aarch64_brk<brk_op>_ptest"
10235 [(set (reg:CC_NZC CC_REGNUM)
10237 [(match_operand:VNx16BI 1 "register_operand")
10239 (match_operand:SI 4 "aarch64_sve_ptrue_flag")
10242 (match_operand:VNx16BI 2 "register_operand")
10243 (match_operand:VNx16BI 3 "register_operand")]
10246 (clobber (match_scratch:VNx16BI 0))]
10248 {@ [ cons: =0, 1 , 2 , 3 ; attrs: pred_clobber ]
10249 [ &Upa , Upa , Upa , Upa ; yes ] brk<brk_op>s\t%0.b, %1/z, %2.b, %3.b
10250 [ ?Upa , 0Upa, 0Upa, 0Upa; yes ] ^
10251 [ Upa , Upa , Upa , Upa ; no ] ^
10255 ;; -------------------------------------------------------------------------
10256 ;; ---- [PRED] Scalarization
10257 ;; -------------------------------------------------------------------------
10261 ;; -------------------------------------------------------------------------
10263 (define_insn "@aarch64_sve_<sve_pred_op><mode>"
10264 [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
10266 [(match_operand:PRED_ALL 1 "register_operand" "Upa")
10267 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10268 (match_operand:PRED_ALL 3 "register_operand" "0")]
10270 (clobber (reg:CC_NZC CC_REGNUM))]
10271 "TARGET_SVE && <max_elem_bits> >= <elem_bits>"
10272 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10275 ;; Same, but also producing a flags result.
10276 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_cc"
10277 [(set (reg:CC_NZC CC_REGNUM)
10279 [(match_operand:VNx16BI 1 "register_operand" "Upa")
10281 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
10284 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
10285 (match_operand:PRED_ALL 6 "register_operand" "0")]
10288 (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
10295 && <max_elem_bits> >= <elem_bits>
10296 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
10297 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10298 "&& !rtx_equal_p (operands[2], operands[4])"
10300 operands[4] = operands[2];
10301 operands[5] = operands[3];
10305 ;; Same, but with only the flags result being interesting.
10306 (define_insn_and_rewrite "*aarch64_sve_<sve_pred_op><mode>_ptest"
10307 [(set (reg:CC_NZC CC_REGNUM)
10309 [(match_operand:VNx16BI 1 "register_operand" "Upa")
10311 (match_operand:SI 3 "aarch64_sve_ptrue_flag")
10314 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
10315 (match_operand:PRED_ALL 6 "register_operand" "0")]
10318 (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
10320 && <max_elem_bits> >= <elem_bits>
10321 && aarch64_sve_same_pred_for_ptest_p (&operands[2], &operands[4])"
10322 "<sve_pred_op>\t%0.<Vetype>, %1, %0.<Vetype>"
10323 "&& !rtx_equal_p (operands[2], operands[4])"
10325 operands[4] = operands[2];
10326 operands[5] = operands[3];
10330 ;; =========================================================================
10331 ;; == Counting elements
10332 ;; =========================================================================
10334 ;; -------------------------------------------------------------------------
10335 ;; ---- [INT] Count elements in a pattern (scalar)
10336 ;; -------------------------------------------------------------------------
10342 ;; -------------------------------------------------------------------------
10344 ;; Count the number of elements in an svpattern. Operand 1 is the pattern,
10345 ;; operand 2 is the number of elements that fit in a 128-bit block, and
10346 ;; operand 3 is a multiplier in the range [1, 16].
10348 ;; Note that this pattern isn't used for SV_ALL (but would work for that too).
10349 (define_insn "aarch64_sve_cnt_pat"
10350 [(set (match_operand:DI 0 "register_operand" "=r")
10352 (unspec:SI [(match_operand:DI 1 "const_int_operand")
10353 (match_operand:DI 2 "const_int_operand")
10354 (match_operand:DI 3 "const_int_operand")]
10355 UNSPEC_SVE_CNT_PAT)))]
10358 return aarch64_output_sve_cnt_pat_immediate ("cnt", "%x0", operands + 1);
10362 ;; -------------------------------------------------------------------------
10363 ;; ---- [INT] Increment by the number of elements in a pattern (scalar)
10364 ;; -------------------------------------------------------------------------
10369 ;; -------------------------------------------------------------------------
10371 ;; Increment a DImode register by the number of elements in an svpattern.
10372 ;; See aarch64_sve_cnt_pat for the counting behavior.
10373 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10374 [(set (match_operand:DI 0 "register_operand" "=r")
10375 (ANY_PLUS:DI (zero_extend:DI
10376 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10377 (match_operand:DI 3 "const_int_operand")
10378 (match_operand:DI 4 "const_int_operand")]
10379 UNSPEC_SVE_CNT_PAT))
10380 (match_operand:DI_ONLY 1 "register_operand" "0")))]
10383 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10388 ;; Increment an SImode register by the number of elements in an svpattern
10389 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
10391 (define_insn "*aarch64_sve_incsi_pat"
10392 [(set (match_operand:SI 0 "register_operand" "=r")
10393 (plus:SI (unspec:SI [(match_operand:DI 2 "const_int_operand")
10394 (match_operand:DI 3 "const_int_operand")
10395 (match_operand:DI 4 "const_int_operand")]
10396 UNSPEC_SVE_CNT_PAT)
10397 (match_operand:SI 1 "register_operand" "0")))]
10400 return aarch64_output_sve_cnt_pat_immediate ("inc", "%x0", operands + 2);
10404 ;; Increment an SImode register by the number of elements in an svpattern
10405 ;; using saturating arithmetic, extending the result to 64 bits.
10407 ;; See aarch64_sve_cnt_pat for the counting behavior.
10408 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10409 [(set (match_operand:DI 0 "register_operand" "=r")
10410 (<paired_extend>:DI
10412 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10413 (match_operand:DI 3 "const_int_operand")
10414 (match_operand:DI 4 "const_int_operand")]
10415 UNSPEC_SVE_CNT_PAT)
10416 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10419 const char *registers = (<CODE> == SS_PLUS ? "%x0, %w0" : "%w0");
10420 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10425 ;; -------------------------------------------------------------------------
10426 ;; ---- [INT] Increment by the number of elements in a pattern (vector)
10427 ;; -------------------------------------------------------------------------
10432 ;; -------------------------------------------------------------------------
10434 ;; Increment a vector of DIs by the number of elements in an svpattern.
10435 ;; See aarch64_sve_cnt_pat for the counting behavior.
10436 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10437 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10439 (vec_duplicate:VNx2DI
10441 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10442 (match_operand:DI 3 "const_int_operand")
10443 (match_operand:DI 4 "const_int_operand")]
10444 UNSPEC_SVE_CNT_PAT)))
10445 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")))]
10448 if (which_alternative == 1)
10449 output_asm_insn ("movprfx\t%0, %1", operands);
10450 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10453 [(set_attr "movprfx" "*,yes")]
10456 ;; Increment a vector of SIs by the number of elements in an svpattern.
10457 ;; See aarch64_sve_cnt_pat for the counting behavior.
10458 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10459 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10461 (vec_duplicate:VNx4SI
10462 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10463 (match_operand:DI 3 "const_int_operand")
10464 (match_operand:DI 4 "const_int_operand")]
10465 UNSPEC_SVE_CNT_PAT))
10466 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")))]
10469 if (which_alternative == 1)
10470 output_asm_insn ("movprfx\t%0, %1", operands);
10471 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10474 [(set_attr "movprfx" "*,yes")]
10477 ;; Increment a vector of HIs by the number of elements in an svpattern.
10478 ;; See aarch64_sve_cnt_pat for the counting behavior.
10479 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10480 [(set (match_operand:VNx8HI 0 "register_operand")
10482 (vec_duplicate:VNx8HI
10484 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10485 (match_operand:DI 3 "const_int_operand")
10486 (match_operand:DI 4 "const_int_operand")]
10487 UNSPEC_SVE_CNT_PAT)))
10488 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10492 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10493 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10495 (vec_duplicate:VNx8HI
10496 (match_operator:HI 5 "subreg_lowpart_operator"
10497 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10498 (match_operand:DI 3 "const_int_operand")
10499 (match_operand:DI 4 "const_int_operand")]
10500 UNSPEC_SVE_CNT_PAT)]))
10501 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")))]
10504 if (which_alternative == 1)
10505 output_asm_insn ("movprfx\t%0, %1", operands);
10506 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10509 [(set_attr "movprfx" "*,yes")]
10512 ;; -------------------------------------------------------------------------
10513 ;; ---- [INT] Decrement by the number of elements in a pattern (scalar)
10514 ;; -------------------------------------------------------------------------
10519 ;; -------------------------------------------------------------------------
10521 ;; Decrement a DImode register by the number of elements in an svpattern.
10522 ;; See aarch64_sve_cnt_pat for the counting behavior.
10523 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10524 [(set (match_operand:DI 0 "register_operand" "=r")
10525 (ANY_MINUS:DI (match_operand:DI_ONLY 1 "register_operand" "0")
10527 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10528 (match_operand:DI 3 "const_int_operand")
10529 (match_operand:DI 4 "const_int_operand")]
10530 UNSPEC_SVE_CNT_PAT))))]
10533 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%x0",
10538 ;; Decrement an SImode register by the number of elements in an svpattern
10539 ;; using modular arithmetic. See aarch64_sve_cnt_pat for the counting
10541 (define_insn "*aarch64_sve_decsi_pat"
10542 [(set (match_operand:SI 0 "register_operand" "=r")
10543 (minus:SI (match_operand:SI 1 "register_operand" "0")
10544 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10545 (match_operand:DI 3 "const_int_operand")
10546 (match_operand:DI 4 "const_int_operand")]
10547 UNSPEC_SVE_CNT_PAT)))]
10550 return aarch64_output_sve_cnt_pat_immediate ("dec", "%x0", operands + 2);
10554 ;; Decrement an SImode register by the number of elements in an svpattern
10555 ;; using saturating arithmetic, extending the result to 64 bits.
10557 ;; See aarch64_sve_cnt_pat for the counting behavior.
10558 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10559 [(set (match_operand:DI 0 "register_operand" "=r")
10560 (<paired_extend>:DI
10562 (match_operand:SI_ONLY 1 "register_operand" "0")
10563 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10564 (match_operand:DI 3 "const_int_operand")
10565 (match_operand:DI 4 "const_int_operand")]
10566 UNSPEC_SVE_CNT_PAT))))]
10569 const char *registers = (<CODE> == SS_MINUS ? "%x0, %w0" : "%w0");
10570 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", registers,
10575 ;; -------------------------------------------------------------------------
10576 ;; ---- [INT] Decrement by the number of elements in a pattern (vector)
10577 ;; -------------------------------------------------------------------------
10582 ;; -------------------------------------------------------------------------
10584 ;; Decrement a vector of DIs by the number of elements in an svpattern.
10585 ;; See aarch64_sve_cnt_pat for the counting behavior.
10586 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10587 [(set (match_operand:VNx2DI 0 "register_operand" "=w, ?&w")
10589 (match_operand:VNx2DI_ONLY 1 "register_operand" "0, w")
10590 (vec_duplicate:VNx2DI
10592 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10593 (match_operand:DI 3 "const_int_operand")
10594 (match_operand:DI 4 "const_int_operand")]
10595 UNSPEC_SVE_CNT_PAT)))))]
10598 if (which_alternative == 1)
10599 output_asm_insn ("movprfx\t%0, %1", operands);
10600 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10603 [(set_attr "movprfx" "*,yes")]
10606 ;; Decrement a vector of SIs by the number of elements in an svpattern.
10607 ;; See aarch64_sve_cnt_pat for the counting behavior.
10608 (define_insn "@aarch64_sve_<inc_dec><mode>_pat"
10609 [(set (match_operand:VNx4SI 0 "register_operand" "=w, ?&w")
10611 (match_operand:VNx4SI_ONLY 1 "register_operand" "0, w")
10612 (vec_duplicate:VNx4SI
10613 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10614 (match_operand:DI 3 "const_int_operand")
10615 (match_operand:DI 4 "const_int_operand")]
10616 UNSPEC_SVE_CNT_PAT))))]
10619 if (which_alternative == 1)
10620 output_asm_insn ("movprfx\t%0, %1", operands);
10621 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10624 [(set_attr "movprfx" "*,yes")]
10627 ;; Decrement a vector of HIs by the number of elements in an svpattern.
10628 ;; See aarch64_sve_cnt_pat for the counting behavior.
10629 (define_expand "@aarch64_sve_<inc_dec><mode>_pat"
10630 [(set (match_operand:VNx8HI 0 "register_operand")
10632 (match_operand:VNx8HI_ONLY 1 "register_operand")
10633 (vec_duplicate:VNx8HI
10635 (unspec:SI [(match_operand:DI 2 "const_int_operand")
10636 (match_operand:DI 3 "const_int_operand")
10637 (match_operand:DI 4 "const_int_operand")]
10638 UNSPEC_SVE_CNT_PAT)))))]
10642 (define_insn "*aarch64_sve_<inc_dec><mode>_pat"
10643 [(set (match_operand:VNx8HI 0 "register_operand" "=w, ?&w")
10645 (match_operand:VNx8HI_ONLY 1 "register_operand" "0, w")
10646 (vec_duplicate:VNx8HI
10647 (match_operator:HI 5 "subreg_lowpart_operator"
10648 [(unspec:SI [(match_operand:DI 2 "const_int_operand")
10649 (match_operand:DI 3 "const_int_operand")
10650 (match_operand:DI 4 "const_int_operand")]
10651 UNSPEC_SVE_CNT_PAT)]))))]
10654 if (which_alternative == 1)
10655 output_asm_insn ("movprfx\t%0, %1", operands);
10656 return aarch64_output_sve_cnt_pat_immediate ("<inc_dec>", "%0.<Vetype>",
10659 [(set_attr "movprfx" "*,yes")]
10662 ;; -------------------------------------------------------------------------
10663 ;; ---- [INT] Count elements in a predicate (scalar)
10664 ;; -------------------------------------------------------------------------
10667 ;; -------------------------------------------------------------------------
10669 ;; Count the number of set bits in a predicate. Operand 3 is true if
10670 ;; operand 1 is known to be all-true.
10671 (define_insn "@aarch64_pred_cntp<mode>"
10672 [(set (match_operand:DI 0 "register_operand" "=r")
10674 (unspec:SI [(match_operand:PRED_ALL 1 "register_operand" "Upl")
10675 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
10676 (match_operand:PRED_ALL 3 "register_operand" "Upa")]
10679 "cntp\t%x0, %1, %3.<Vetype>")
10681 ;; -------------------------------------------------------------------------
10682 ;; ---- [INT] Increment by the number of elements in a predicate (scalar)
10683 ;; -------------------------------------------------------------------------
10688 ;; -------------------------------------------------------------------------
10690 ;; Increment a DImode register by the number of set bits in a predicate.
10691 ;; See aarch64_sve_cntp for a description of the operands.
10692 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10693 [(set (match_operand:DI 0 "register_operand")
10696 (unspec:SI [(match_dup 3)
10697 (const_int SVE_KNOWN_PTRUE)
10698 (match_operand:PRED_ALL 2 "register_operand")]
10700 (match_operand:DI_ONLY 1 "register_operand")))]
10703 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10707 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10708 [(set (match_operand:DI 0 "register_operand" "=r")
10711 (unspec:SI [(match_operand 3)
10712 (const_int SVE_KNOWN_PTRUE)
10713 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10715 (match_operand:DI_ONLY 1 "register_operand" "0")))]
10717 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10718 "&& !CONSTANT_P (operands[3])"
10720 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10724 ;; Increment an SImode register by the number of set bits in a predicate
10725 ;; using modular arithmetic. See aarch64_sve_cntp for a description of
10727 (define_insn_and_rewrite "*aarch64_incsi<mode>_cntp"
10728 [(set (match_operand:SI 0 "register_operand" "=r")
10730 (unspec:SI [(match_operand 3)
10731 (const_int SVE_KNOWN_PTRUE)
10732 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10734 (match_operand:SI 1 "register_operand" "0")))]
10736 "incp\t%x0, %2.<Vetype>"
10737 "&& !CONSTANT_P (operands[3])"
10739 operands[3] = CONSTM1_RTX (<MODE>mode);
10743 ;; Increment an SImode register by the number of set bits in a predicate
10744 ;; using saturating arithmetic, extending the result to 64 bits.
10746 ;; See aarch64_sve_cntp for a description of the operands.
10747 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10748 [(set (match_operand:DI 0 "register_operand")
10749 (<paired_extend>:DI
10751 (unspec:SI [(match_dup 3)
10752 (const_int SVE_KNOWN_PTRUE)
10753 (match_operand:PRED_ALL 2 "register_operand")]
10755 (match_operand:SI_ONLY 1 "register_operand"))))]
10758 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10762 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10763 [(set (match_operand:DI 0 "register_operand" "=r")
10764 (<paired_extend>:DI
10766 (unspec:SI [(match_operand 3)
10767 (const_int SVE_KNOWN_PTRUE)
10768 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10770 (match_operand:SI_ONLY 1 "register_operand" "0"))))]
10773 if (<CODE> == SS_PLUS)
10774 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
10776 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
10778 "&& !CONSTANT_P (operands[3])"
10780 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10784 ;; -------------------------------------------------------------------------
10785 ;; ---- [INT] Increment by the number of elements in a predicate (vector)
10786 ;; -------------------------------------------------------------------------
10791 ;; -------------------------------------------------------------------------
10793 ;; Increment a vector of DIs by the number of set bits in a predicate.
10794 ;; See aarch64_sve_cntp for a description of the operands.
10795 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10796 [(set (match_operand:VNx2DI 0 "register_operand")
10798 (vec_duplicate:VNx2DI
10802 (const_int SVE_KNOWN_PTRUE)
10803 (match_operand:<VPRED> 2 "register_operand")]
10805 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10808 operands[3] = CONSTM1_RTX (<VPRED>mode);
10812 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10813 [(set (match_operand:VNx2DI 0 "register_operand")
10815 (vec_duplicate:VNx2DI
10819 (const_int SVE_KNOWN_PTRUE)
10820 (match_operand:<VPRED> 2 "register_operand")]
10822 (match_operand:VNx2DI_ONLY 1 "register_operand")))]
10824 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
10825 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.d, %2
10826 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2
10828 "&& !CONSTANT_P (operands[3])"
10830 operands[3] = CONSTM1_RTX (<VPRED>mode);
10834 ;; Increment a vector of SIs by the number of set bits in a predicate.
10835 ;; See aarch64_sve_cntp for a description of the operands.
10836 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10837 [(set (match_operand:VNx4SI 0 "register_operand")
10839 (vec_duplicate:VNx4SI
10842 (const_int SVE_KNOWN_PTRUE)
10843 (match_operand:<VPRED> 2 "register_operand")]
10845 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10848 operands[3] = CONSTM1_RTX (<VPRED>mode);
10852 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10853 [(set (match_operand:VNx4SI 0 "register_operand")
10855 (vec_duplicate:VNx4SI
10858 (const_int SVE_KNOWN_PTRUE)
10859 (match_operand:<VPRED> 2 "register_operand")]
10861 (match_operand:VNx4SI_ONLY 1 "register_operand")))]
10863 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
10864 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.s, %2
10865 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2
10867 "&& !CONSTANT_P (operands[3])"
10869 operands[3] = CONSTM1_RTX (<VPRED>mode);
10873 ;; Increment a vector of HIs by the number of set bits in a predicate.
10874 ;; See aarch64_sve_cntp for a description of the operands.
10875 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
10876 [(set (match_operand:VNx8HI 0 "register_operand")
10878 (vec_duplicate:VNx8HI
10882 (const_int SVE_KNOWN_PTRUE)
10883 (match_operand:<VPRED> 2 "register_operand")]
10885 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10888 operands[3] = CONSTM1_RTX (<VPRED>mode);
10892 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
10893 [(set (match_operand:VNx8HI 0 "register_operand")
10895 (vec_duplicate:VNx8HI
10896 (match_operator:HI 3 "subreg_lowpart_operator"
10899 (const_int SVE_KNOWN_PTRUE)
10900 (match_operand:<VPRED> 2 "register_operand")]
10902 (match_operand:VNx8HI_ONLY 1 "register_operand")))]
10904 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
10905 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.h, %2
10906 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2
10908 "&& !CONSTANT_P (operands[4])"
10910 operands[4] = CONSTM1_RTX (<VPRED>mode);
10914 ;; -------------------------------------------------------------------------
10915 ;; ---- [INT] Decrement by the number of elements in a predicate (scalar)
10916 ;; -------------------------------------------------------------------------
10921 ;; -------------------------------------------------------------------------
10923 ;; Decrement a DImode register by the number of set bits in a predicate.
10924 ;; See aarch64_sve_cntp for a description of the operands.
10925 (define_expand "@aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10926 [(set (match_operand:DI 0 "register_operand")
10928 (match_operand:DI_ONLY 1 "register_operand")
10930 (unspec:SI [(match_dup 3)
10931 (const_int SVE_KNOWN_PTRUE)
10932 (match_operand:PRED_ALL 2 "register_operand")]
10936 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10940 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><DI_ONLY:mode><PRED_ALL:mode>_cntp"
10941 [(set (match_operand:DI 0 "register_operand" "=r")
10943 (match_operand:DI_ONLY 1 "register_operand" "0")
10945 (unspec:SI [(match_operand 3)
10946 (const_int SVE_KNOWN_PTRUE)
10947 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10950 "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>"
10951 "&& !CONSTANT_P (operands[3])"
10953 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10957 ;; Decrement an SImode register by the number of set bits in a predicate
10958 ;; using modular arithmetic. See aarch64_sve_cntp for a description of the
10960 (define_insn_and_rewrite "*aarch64_decsi<mode>_cntp"
10961 [(set (match_operand:SI 0 "register_operand" "=r")
10963 (match_operand:SI 1 "register_operand" "0")
10964 (unspec:SI [(match_operand 3)
10965 (const_int SVE_KNOWN_PTRUE)
10966 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
10969 "decp\t%x0, %2.<Vetype>"
10970 "&& !CONSTANT_P (operands[3])"
10972 operands[3] = CONSTM1_RTX (<MODE>mode);
10976 ;; Decrement an SImode register by the number of set bits in a predicate
10977 ;; using saturating arithmetic, extending the result to 64 bits.
10979 ;; See aarch64_sve_cntp for a description of the operands.
10980 (define_expand "@aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10981 [(set (match_operand:DI 0 "register_operand")
10982 (<paired_extend>:DI
10984 (match_operand:SI_ONLY 1 "register_operand")
10985 (unspec:SI [(match_dup 3)
10986 (const_int SVE_KNOWN_PTRUE)
10987 (match_operand:PRED_ALL 2 "register_operand")]
10991 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
10995 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><SI_ONLY:mode><PRED_ALL:mode>_cntp"
10996 [(set (match_operand:DI 0 "register_operand" "=r")
10997 (<paired_extend>:DI
10999 (match_operand:SI_ONLY 1 "register_operand" "0")
11000 (unspec:SI [(match_operand 3)
11001 (const_int SVE_KNOWN_PTRUE)
11002 (match_operand:PRED_ALL 2 "register_operand" "Upa")]
11006 if (<CODE> == SS_MINUS)
11007 return "<inc_dec>p\t%x0, %2.<PRED_ALL:Vetype>, %w0";
11009 return "<inc_dec>p\t%w0, %2.<PRED_ALL:Vetype>";
11011 "&& !CONSTANT_P (operands[3])"
11013 operands[3] = CONSTM1_RTX (<PRED_ALL:MODE>mode);
11017 ;; -------------------------------------------------------------------------
11018 ;; ---- [INT] Decrement by the number of elements in a predicate (vector)
11019 ;; -------------------------------------------------------------------------
11024 ;; -------------------------------------------------------------------------
11026 ;; Decrement a vector of DIs by the number of set bits in a predicate.
11027 ;; See aarch64_sve_cntp for a description of the operands.
11028 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
11029 [(set (match_operand:VNx2DI 0 "register_operand")
11031 (match_operand:VNx2DI_ONLY 1 "register_operand")
11032 (vec_duplicate:VNx2DI
11036 (const_int SVE_KNOWN_PTRUE)
11037 (match_operand:<VPRED> 2 "register_operand")]
11041 operands[3] = CONSTM1_RTX (<VPRED>mode);
11045 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11046 [(set (match_operand:VNx2DI 0 "register_operand")
11048 (match_operand:VNx2DI_ONLY 1 "register_operand")
11049 (vec_duplicate:VNx2DI
11053 (const_int SVE_KNOWN_PTRUE)
11054 (match_operand:<VPRED> 2 "register_operand")]
11057 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
11058 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.d, %2
11059 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.d, %2
11061 "&& !CONSTANT_P (operands[3])"
11063 operands[3] = CONSTM1_RTX (<VPRED>mode);
11067 ;; Decrement a vector of SIs by the number of set bits in a predicate.
11068 ;; See aarch64_sve_cntp for a description of the operands.
11069 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
11070 [(set (match_operand:VNx4SI 0 "register_operand")
11072 (match_operand:VNx4SI_ONLY 1 "register_operand")
11073 (vec_duplicate:VNx4SI
11076 (const_int SVE_KNOWN_PTRUE)
11077 (match_operand:<VPRED> 2 "register_operand")]
11081 operands[3] = CONSTM1_RTX (<VPRED>mode);
11085 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11086 [(set (match_operand:VNx4SI 0 "register_operand")
11088 (match_operand:VNx4SI_ONLY 1 "register_operand")
11089 (vec_duplicate:VNx4SI
11092 (const_int SVE_KNOWN_PTRUE)
11093 (match_operand:<VPRED> 2 "register_operand")]
11096 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
11097 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.s, %2
11098 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.s, %2
11100 "&& !CONSTANT_P (operands[3])"
11102 operands[3] = CONSTM1_RTX (<VPRED>mode);
11106 ;; Decrement a vector of HIs by the number of set bits in a predicate.
11107 ;; See aarch64_sve_cntp for a description of the operands.
11108 (define_expand "@aarch64_sve_<inc_dec><mode>_cntp"
11109 [(set (match_operand:VNx8HI 0 "register_operand")
11111 (match_operand:VNx8HI_ONLY 1 "register_operand")
11112 (vec_duplicate:VNx8HI
11116 (const_int SVE_KNOWN_PTRUE)
11117 (match_operand:<VPRED> 2 "register_operand")]
11121 operands[3] = CONSTM1_RTX (<VPRED>mode);
11125 (define_insn_and_rewrite "*aarch64_sve_<inc_dec><mode>_cntp"
11126 [(set (match_operand:VNx8HI 0 "register_operand")
11128 (match_operand:VNx8HI_ONLY 1 "register_operand")
11129 (vec_duplicate:VNx8HI
11130 (match_operator:HI 3 "subreg_lowpart_operator"
11133 (const_int SVE_KNOWN_PTRUE)
11134 (match_operand:<VPRED> 2 "register_operand")]
11137 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
11138 [ w , 0 , Upa ; * ] <inc_dec>p\t%0.h, %2
11139 [ ?&w , w , Upa ; yes ] movprfx\t%0, %1\;<inc_dec>p\t%0.h, %2
11141 "&& !CONSTANT_P (operands[4])"
11143 operands[4] = CONSTM1_RTX (<VPRED>mode);
11147 (define_insn_and_split "@aarch64_sve_get_neonq_<mode>"
11148 [(set (match_operand:<V128> 0 "register_operand" "=w")
11150 (match_operand:SVE_FULL 1 "register_operand" "w")
11151 (match_operand 2 "descending_int_parallel")))]
11153 && BYTES_BIG_ENDIAN
11154 && known_eq (INTVAL (XVECEXP (operands[2], 0, 0)),
11155 GET_MODE_NUNITS (<V128>mode) - 1)"
11157 "&& reload_completed"
11158 [(set (match_dup 0) (match_dup 1))]
11160 operands[1] = gen_rtx_REG (<V128>mode, REGNO (operands[1]));
11164 (define_insn "@aarch64_sve_set_neonq_<mode>"
11165 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
11167 [(match_operand:SVE_FULL 1 "register_operand" "w")
11168 (match_operand:<V128> 2 "register_operand" "w")
11169 (match_operand:<VPRED> 3 "register_operand" "Upl")]
11170 UNSPEC_SET_NEONQ))]
11172 && BYTES_BIG_ENDIAN"
11174 operands[2] = lowpart_subreg (<MODE>mode, operands[2],
11175 GET_MODE (operands[2]));
11176 return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";