1 ;; Machine description for AArch64 SVE2.
2 ;; Copyright (C) 2019-2025 Free Software Foundation, Inc.
3 ;; Contributed by ARM Ltd.
5 ;; This file is part of GCC.
7 ;; GCC is free software; you can redistribute it and/or modify it
8 ;; under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
12 ;; GCC is distributed in the hope that it will be useful, but
13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 ;; General Public License for more details.
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
21 ;; The file is organised into the following sections (search for the full
25 ;; ---- Predicate to vector moves
26 ;; ---- Vector to predicate moves
29 ;; ---- 128-bit extending loads
30 ;; ---- 128-bit structure loads
31 ;; ---- Multi-register loads predicated by a counter
32 ;; ---- 128-bit gather loads
33 ;; ---- Non-temporal gather loads
36 ;; ---- 128-bit truncating stores
37 ;; ---- 128-bit structure stores
38 ;; ---- Multi-register stores predicated by a counter
39 ;; ---- 128-bit scatter stores
40 ;; ---- Non-temporal scatter stores
42 ;; == Predicate manipulation
43 ;; ---- [PRED] Predicate-as-counter PTRUE
44 ;; ---- [PRED] Predicate extraction
45 ;; ---- [PRED] Predicate selection
46 ;; ---- [PRED] Predicate count
48 ;; == Uniform unary arithmnetic
49 ;; ---- [FP] Multi-register unary operations
51 ;; == Uniform binary arithmnetic
52 ;; ---- [INT] Multi-register operations
53 ;; ---- [INT] Clamp to minimum/maximum
54 ;; ---- [INT] Multiplication
55 ;; ---- [INT] Scaled high-part multiplication
56 ;; ---- [INT] General binary arithmetic that maps to unspecs
57 ;; ---- [INT] Saturating binary arithmetic
58 ;; ---- [INT] Saturating left shifts
59 ;; ---- [FP] Non-widening bfloat16 arithmetic
60 ;; ---- [FP] Clamp to minimum/maximum
62 ;; == Uniform ternary arithmnetic
63 ;; ---- [INT] General ternary arithmetic that maps to unspecs
64 ;; ---- [INT] Multiply-and-accumulate operations
65 ;; ---- [INT] Binary logic operations with rotation
66 ;; ---- [INT] Ternary logic operations
67 ;; ---- [INT] Shift-and-accumulate operations
68 ;; ---- [INT] Shift-and-insert operations
69 ;; ---- [INT] Sum of absolute differences
70 ;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
71 ;; ---- [FP] Mfloat8 dot products
73 ;; == Extending arithmetic
74 ;; ---- [INT] Multi-register widening conversions
75 ;; ---- [INT] Wide binary arithmetic
76 ;; ---- [INT] Long binary arithmetic
77 ;; ---- [INT] Long left shifts
78 ;; ---- [INT] Long binary arithmetic with accumulation
79 ;; ---- [FP] Multi-register operations
80 ;; ---- [FP] Long multiplication with accumulation
82 ;; == Narrowing arithnetic
83 ;; ---- [INT] Narrowing unary arithmetic
84 ;; ---- [INT] Multi-vector narrowing unary arithmetic
85 ;; ---- [INT] Narrowing binary arithmetic
86 ;; ---- [INT] Narrowing right shifts
87 ;; ---- [INT] Multi-vector narrowing right shifts
89 ;; == Pairwise arithmetic
90 ;; ---- [INT] Pairwise arithmetic
91 ;; ---- [FP] Pairwise arithmetic
92 ;; ---- [INT] Pairwise arithmetic with accumulation
94 ;; == Complex arithmetic
95 ;; ---- [INT] Complex binary operations
96 ;; ---- [INT] Complex ternary operations
97 ;; ---- [INT] Complex dot product
100 ;; ---- [FP<-FP] Widening conversions
101 ;; ---- [FP<-FP] Narrowing conversions
102 ;; ---- [FP<-FP] Multi-vector widening conversions
103 ;; ---- [FP<-FP] Multi-vector narrowing conversions
104 ;; ---- [FP<-INT] Multi-vector conversions
105 ;; ---- [INT<-FP] Multi-vector conversions
107 ;; == Other arithmetic
108 ;; ---- [INT] Reciprocal approximation
109 ;; ---- [INT<-FP] Base-2 logarithm
110 ;; ---- [INT] Polynomial multiplication
112 ;; == Comparisons and selects
113 ;; ---- [INT,FP] Select based on predicates as counters
114 ;; ---- [INT] While tests
117 ;; ---- [INT] Reduction to 128-bit vector
118 ;; ---- [FP] Reduction to 128-bit vector
121 ;; ---- [INT,FP] Reversal
122 ;; ---- [INT,FP] HVLA permutes
123 ;; ---- [INT,FP] General permutes
124 ;; ---- [INT,FP] Multi-register permutes
125 ;; ---- [INT] Optional bit-permute extensions
128 ;; ---- Check for aliases between pointers
129 ;; ---- Histogram processing
130 ;; ---- String matching
133 ;; == Cryptographic extensions
134 ;; ---- Optional AES extensions
135 ;; ---- Optional SHA-3 extensions
136 ;; ---- Optional SM4 extensions
138 ;; =========================================================================
140 ;; =========================================================================
142 ;; -------------------------------------------------------------------------
143 ;; ---- Predicate to vector moves
144 ;; -------------------------------------------------------------------------
146 ;; - PMOV (to vector) (SVE2p1)
147 ;; -------------------------------------------------------------------------
149 (define_insn "@aarch64_pmov_to_<mode>"
150 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
152 [(match_operand:<VPRED> 1 "register_operand" "Upa")]
153 UNSPEC_PMOV_UNPACK))]
154 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
155 "pmov\t%0, %1.<Vetype>"
158 (define_insn "@aarch64_pmov_lane_to_<mode>"
159 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
161 [(match_operand:SVE_FULL_I 1 "register_operand" "0")
162 (match_operand:<VPRED> 2 "register_operand" "Upa")
163 (match_operand:DI 3 "immediate_operand")]
164 UNSPEC_PMOV_UNPACK_LANE))]
165 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
166 "pmov\t%0[%3], %2.<Vetype>"
169 ;; -------------------------------------------------------------------------
170 ;; ---- Vector to predicate moves
171 ;; -------------------------------------------------------------------------
173 ;; - PMOV (from vector) (SVE2p1)
174 ;; -------------------------------------------------------------------------
176 (define_insn "@aarch64_pmov_from_<mode>"
177 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
179 [(match_operand:SVE_FULL_I 1 "register_operand" "w")]
181 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
182 "pmov\t%0.<Vetype>, %1"
185 (define_insn "@aarch64_pmov_lane_from_<mode>"
186 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
188 [(match_operand:SVE_FULL_I 1 "register_operand" "w")
189 (match_operand:DI 2 "immediate_operand")]
190 UNSPEC_PMOV_PACK_LANE))]
191 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
192 "pmov\t%0.<Vetype>, %1[%2]"
195 ;; =========================================================================
197 ;; =========================================================================
199 ;; -------------------------------------------------------------------------
200 ;; ---- 128-bit extending loads
201 ;; -------------------------------------------------------------------------
203 ;; - LD1W (to .Q) (SVE2p1)
204 ;; - LD1D (to .Q) (SVE2p1)
205 ;; -------------------------------------------------------------------------
207 ;; There isn't really a natural way of representing these instructions
208 ;; with the modes that we normally use:
210 ;; (1) It doesn't really make sense to use VNx1TI (or similar) for the
211 ;; result, since there's nothing that can be done with such a mode
212 ;; other than to cast it to another mode. It also isn't how the
213 ;; ACLE represents it (for similar reasons).
215 ;; (2) Only the lowest bit of each 16 in the predicate is significant,
216 ;; but it doesn't really make sense to use VNx1BI to represent it,
217 ;; since there is no "PTRUE Pn.Q, ..." instruction.
219 ;; (3) We do however need to use VNx1DI and VNx1SI to represent the
220 ;; source memories, since none of the normal register modes would
221 ;; give the right extent and alignment information (with the alignment
222 ;; mattering only for -mstrict-align).
223 (define_insn "@aarch64_sve_ld1_extendq<mode>"
224 [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
226 [(match_operand:<VPRED> 2 "register_operand" "Upl")
227 (match_operand:<LD1_EXTENDQ_MEM> 1 "memory_operand" "m")]
228 UNSPEC_LD1_EXTENDQ))]
229 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
230 "ld1<Vesize>\t{%0.q}, %2/z, %1"
233 ;; -------------------------------------------------------------------------
234 ;; ---- 128-bit structure loads
235 ;; -------------------------------------------------------------------------
240 ;; -------------------------------------------------------------------------
242 ;; Predicated LD[234]Q.
243 (define_insn "@aarch64_sve_ldnq<mode>"
244 [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
246 [(match_operand:<VPRED> 2 "register_operand" "Upl")
247 (match_operand:<VNxTI> 1 "memory_operand" "m")]
249 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
250 "ld<vector_count>q\t{%S0.q - %<Vendreg>0.q}, %2/z, %1"
253 ;; -------------------------------------------------------------------------
254 ;; ---- Multi-register loads predicated by a counter
255 ;; -------------------------------------------------------------------------
265 ;; -------------------------------------------------------------------------
267 ;; Predicated LD1 (multi), with a count as predicate.
268 (define_insn "@aarch64_<optab><mode>"
269 [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
271 [(match_operand:VNx16BI 2 "register_operand" "Uph")
272 (match_operand:SVE_FULLx24 1 "memory_operand" "m")
273 (match_operand:SVE_FULLx24 3 "aarch64_maskload_else_operand")]
275 "TARGET_SVE2p1_OR_SME2"
276 "<optab><Vesize>\t%0, %K2/z, %1"
277 [(set_attr "stride_type" "ld1_consecutive")]
280 (define_insn "@aarch64_<optab><mode>_strided2"
281 [(set (match_operand:<VSINGLE> 0 "aarch64_simd_register" "=Uwd")
283 [(match_operand:VNx16BI 3 "register_operand" "Uph")
284 (match_operand:SVE_FULLx2 2 "memory_operand" "m")
287 (set (match_operand:<VSINGLE> 1 "aarch64_simd_register" "=w")
293 "TARGET_STREAMING_SME2
294 && aarch64_strided_registers_p (operands, 2, 8)"
295 "<optab><Vesize>\t{%0.<Vetype>, %1.<Vetype>}, %K3/z, %2"
296 [(set_attr "stride_type" "ld1_strided")]
299 (define_insn "@aarch64_<optab><mode>_strided4"
300 [(set (match_operand:<VSINGLE> 0 "aarch64_simd_register" "=Uwt")
302 [(match_operand:VNx16BI 5 "register_operand" "Uph")
303 (match_operand:SVE_FULLx4 4 "memory_operand" "m")
306 (set (match_operand:<VSINGLE> 1 "aarch64_simd_register" "=w")
312 (set (match_operand:<VSINGLE> 2 "aarch64_simd_register" "=w")
318 (set (match_operand:<VSINGLE> 3 "aarch64_simd_register" "=w")
324 "TARGET_STREAMING_SME2
325 && aarch64_strided_registers_p (operands, 4, 4)"
326 "<optab><Vesize>\t{%0.<Vetype>, %1.<Vetype>, %2.<Vetype>, %3.<Vetype>}, %K5/z, %4"
327 [(set_attr "stride_type" "ld1_strided")]
330 ;; -------------------------------------------------------------------------
331 ;; ---- 128-bit gather loads
332 ;; -------------------------------------------------------------------------
333 ;; Includes gather forms of:
335 ;; -------------------------------------------------------------------------
337 ;; Model this as operating on the largest valid element size, which is DI.
338 ;; This avoids having to define move patterns & more for VNx1TI, which would
339 ;; be difficult without a non-gather form of LD1Q.
340 (define_insn "aarch64_gather_ld1q"
341 [(set (match_operand:VNx2DI 0 "register_operand")
343 [(match_operand:VNx2BI 1 "register_operand")
344 (match_operand:DI 2 "aarch64_reg_or_zero")
345 (match_operand:VNx2DI 3 "register_operand")
348 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
349 {@ [cons: =0, 1, 2, 3]
350 [&w, Upl, Z, w] ld1q\t{%0.q}, %1/z, [%3.d]
352 [&w, Upl, r, w] ld1q\t{%0.q}, %1/z, [%3.d, %2]
357 ;; -------------------------------------------------------------------------
358 ;; ---- Non-temporal gather loads
359 ;; -------------------------------------------------------------------------
360 ;; Includes gather forms of:
365 ;; -------------------------------------------------------------------------
367 ;; Non-extending loads.
368 (define_insn "@aarch64_gather_ldnt<mode>"
369 [(set (match_operand:SVE_FULL_SD 0 "register_operand")
371 [(match_operand:<VPRED> 1 "register_operand")
372 (match_operand:DI 2 "aarch64_reg_or_zero")
373 (match_operand:<V_INT_EQUIV> 3 "register_operand")
375 UNSPEC_LDNT1_GATHER))]
376 "TARGET_SVE2 && TARGET_NON_STREAMING"
377 {@ [cons: =0, 1, 2, 3]
378 [&w, Upl, Z, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>]
380 [&w, Upl, r, w ] ldnt1<Vesize>\t%0.<Vetype>, %1/z, [%3.<Vetype>, %2]
386 (define_insn_and_rewrite "@aarch64_gather_ldnt_<ANY_EXTEND:optab><SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
387 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
389 [(match_operand:<SVE_FULL_SDI:VPRED> 4 "general_operand")
390 (ANY_EXTEND:SVE_FULL_SDI
391 (unspec:SVE_PARTIAL_I
392 [(match_operand:<SVE_FULL_SDI:VPRED> 1 "register_operand")
393 (match_operand:DI 2 "aarch64_reg_or_zero")
394 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 3 "register_operand")
396 UNSPEC_LDNT1_GATHER))]
399 && TARGET_NON_STREAMING
400 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
401 {@ [cons: =0, 1, 2, 3, 4]
402 [&w, Upl, Z, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>]
403 [?w, Upl, Z, 0, UplDnm] ^
404 [&w, Upl, r, w, UplDnm] ldnt1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_FULL_SDI:Vetype>, %1/z, [%3.<SVE_FULL_SDI:Vetype>, %2]
405 [?w, Upl, r, 0, UplDnm] ^
407 "&& !CONSTANT_P (operands[4])"
409 operands[4] = CONSTM1_RTX (<SVE_FULL_SDI:VPRED>mode);
413 ;; =========================================================================
415 ;; =========================================================================
417 ;; -------------------------------------------------------------------------
418 ;; ---- 128-bit truncating stores
419 ;; -------------------------------------------------------------------------
421 ;; - ST1W (from .Q) (SVE2p1)
422 ;; - ST1D (from .Q) (SVE2p1)
423 ;; -------------------------------------------------------------------------
425 ;; See the comment above the corresponding loads for a discussion about the
427 (define_insn "@aarch64_sve_st1_truncq<mode>"
428 [(set (match_operand:<LD1_EXTENDQ_MEM> 0 "memory_operand" "+m")
429 (unspec:<LD1_EXTENDQ_MEM>
430 [(match_operand:<VPRED> 2 "register_operand" "Upl")
431 (match_operand:SVE_FULL_SD 1 "register_operand" "w")
434 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
435 "st1<Vesize>\t{%1.q}, %2, %0"
438 ;; -------------------------------------------------------------------------
439 ;; ---- 128-bit structure stores
440 ;; -------------------------------------------------------------------------
445 ;; -------------------------------------------------------------------------
447 ;; Predicated ST[234].
448 (define_insn "@aarch64_sve_stnq<mode>"
449 [(set (match_operand:<VNxTI> 0 "memory_operand" "+m")
451 [(match_operand:<VPRED> 2 "register_operand" "Upl")
452 (match_operand:SVE_STRUCT 1 "register_operand" "w")
455 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
456 "st<vector_count>q\t{%S1.q - %<Vendreg>1.q}, %2, %0"
459 ;; -------------------------------------------------------------------------
460 ;; ---- Multi-register stores predicated by a counter
461 ;; -------------------------------------------------------------------------
471 ;; -------------------------------------------------------------------------
473 (define_insn "@aarch64_<optab><mode>"
474 [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
476 [(match_operand:VNx16BI 2 "register_operand" "Uph")
477 (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
480 "TARGET_SVE2p1_OR_SME2"
481 "<optab><Vesize>\t%1, %K2, %0"
482 [(set_attr "stride_type" "st1_consecutive")]
485 (define_insn "@aarch64_<optab><mode>_strided2"
486 [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
488 [(match_operand:VNx16BI 1 "register_operand" "Uph")
489 (match_operand:<VSINGLE> 2 "aarch64_simd_register" "Uwd")
490 (match_operand:<VSINGLE> 3 "aarch64_simd_register" "w")
493 "TARGET_STREAMING_SME2
494 && aarch64_strided_registers_p (operands + 2, 2, 8)"
495 "<optab><Vesize>\t{%2.<Vetype>, %3.<Vetype>}, %K1, %0"
496 [(set_attr "stride_type" "st1_strided")]
499 (define_insn "@aarch64_<optab><mode>_strided4"
500 [(set (match_operand:SVE_FULLx24 0 "memory_operand" "+m")
502 [(match_operand:VNx16BI 1 "register_operand" "Uph")
503 (match_operand:<VSINGLE> 2 "aarch64_simd_register" "Uwt")
504 (match_operand:<VSINGLE> 3 "aarch64_simd_register" "w")
505 (match_operand:<VSINGLE> 4 "aarch64_simd_register" "w")
506 (match_operand:<VSINGLE> 5 "aarch64_simd_register" "w")
509 "TARGET_STREAMING_SME2
510 && aarch64_strided_registers_p (operands + 2, 4, 4)"
511 "<optab><Vesize>\t{%2.<Vetype>, %3.<Vetype>, %4.<Vetype>, %5.<Vetype>}, %K1, %0"
512 [(set_attr "stride_type" "st1_strided")]
515 ;; -------------------------------------------------------------------------
516 ;; ---- 128-bit scatter stores
517 ;; -------------------------------------------------------------------------
518 ;; Includes scatter form of:
520 ;; -------------------------------------------------------------------------
522 (define_insn "aarch64_scatter_st1q"
523 [(set (mem:BLK (scratch))
525 [(match_operand:VNx2BI 0 "register_operand")
526 (match_operand:DI 1 "aarch64_reg_or_zero")
527 (match_operand:VNx2DI 2 "register_operand")
528 (match_operand:VNx2DI 3 "register_operand")]
529 UNSPEC_ST1Q_SCATTER))]
530 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
531 {@ [ cons: 0 , 1 , 2 , 3 ]
532 [ Upl , Z , w , w ] st1q\t{%3.q}, %0, [%2.d]
533 [ Upl , r , w , w ] st1q\t{%3.q}, %0, [%2.d, %1]
537 ;; -------------------------------------------------------------------------
538 ;; ---- Non-temporal scatter stores
539 ;; -------------------------------------------------------------------------
540 ;; Includes scatter forms of:
545 ;; -------------------------------------------------------------------------
547 ;; Non-truncating stores.
548 (define_insn "@aarch64_scatter_stnt<mode>"
549 [(set (mem:BLK (scratch))
551 [(match_operand:<VPRED> 0 "register_operand")
552 (match_operand:DI 1 "aarch64_reg_or_zero")
553 (match_operand:<V_INT_EQUIV> 2 "register_operand")
554 (match_operand:SVE_FULL_SD 3 "register_operand")]
556 UNSPEC_STNT1_SCATTER))]
557 "TARGET_SVE && TARGET_NON_STREAMING"
558 {@ [ cons: 0 , 1 , 2 , 3 ]
559 [ Upl , Z , w , w ] stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>]
560 [ Upl , r , w , w ] stnt1<Vesize>\t%3.<Vetype>, %0, [%2.<Vetype>, %1]
564 ;; Truncating stores.
565 (define_insn "@aarch64_scatter_stnt_<SVE_FULL_SDI:mode><SVE_PARTIAL_I:mode>"
566 [(set (mem:BLK (scratch))
568 [(match_operand:<SVE_FULL_SDI:VPRED> 0 "register_operand")
569 (match_operand:DI 1 "aarch64_reg_or_zero")
570 (match_operand:<SVE_FULL_SDI:V_INT_EQUIV> 2 "register_operand")
571 (truncate:SVE_PARTIAL_I
572 (match_operand:SVE_FULL_SDI 3 "register_operand"))]
573 UNSPEC_STNT1_SCATTER))]
575 && TARGET_NON_STREAMING
576 && (~<SVE_FULL_SDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
577 {@ [ cons: 0 , 1 , 2 , 3 ]
578 [ Upl , Z , w , w ] stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>]
579 [ Upl , r , w , w ] stnt1<SVE_PARTIAL_I:Vesize>\t%3.<SVE_FULL_SDI:Vetype>, %0, [%2.<SVE_FULL_SDI:Vetype>, %1]
583 ;; =========================================================================
584 ;; == Predicate manipulation
585 ;; =========================================================================
587 ;; -------------------------------------------------------------------------
588 ;; ---- [PRED] Predicate-as-counter PTRUE
589 ;; -------------------------------------------------------------------------
590 ;; - PTRUE (predicate-as-counter form)
591 ;; -------------------------------------------------------------------------
593 (define_insn "@aarch64_sve_ptrue_c<BHSD_BITS>"
594 [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
595 (unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))]
596 "TARGET_SVE2p1_OR_SME2"
597 "ptrue\t%K0.<bits_etype>"
600 ;; -------------------------------------------------------------------------
601 ;; ---- [PRED] Predicate extraction
602 ;; -------------------------------------------------------------------------
605 ;; -------------------------------------------------------------------------
607 (define_insn "@aarch64_sve_pext<BHSD_BITS>"
608 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
610 [(match_operand:VNx16BI 1 "register_operand" "Uph")
611 (match_operand:DI 2 "const_int_operand")
612 (const_int BHSD_BITS)]
614 "TARGET_SVE2p1_OR_SME2"
615 "pext\t%0.<bits_etype>, %K1[%2]"
618 (define_insn "@aarch64_sve_pext<BHSD_BITS>x2"
619 [(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
621 [(match_operand:VNx16BI 1 "register_operand" "Uph")
622 (match_operand:DI 2 "const_int_operand")
623 (const_int BHSD_BITS)]
625 "TARGET_SVE2p1_OR_SME2"
626 "pext\t{%S0.<bits_etype>, %T0.<bits_etype>}, %K1[%2]"
629 ;; -------------------------------------------------------------------------
630 ;; ---- [PRED] Predicate selection
631 ;; -------------------------------------------------------------------------
634 ;; -------------------------------------------------------------------------
636 (define_insn "@aarch64_sve_psel<BHSD_BITS>"
637 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
639 [(match_operand:VNx16BI 1 "register_operand" "Upa")
640 (match_operand:VNx16BI 2 "register_operand" "Upa")
641 (match_operand:SI 3 "register_operand" "Ucj")
642 (const_int BHSD_BITS)]
644 "TARGET_SVE2p1_OR_SME"
645 "psel\t%0, %1, %2.<bits_etype>[%w3, 0]"
648 (define_insn "*aarch64_sve_psel<BHSD_BITS>_plus"
649 [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
651 [(match_operand:VNx16BI 1 "register_operand" "Upa")
652 (match_operand:VNx16BI 2 "register_operand" "Upa")
654 (match_operand:SI 3 "register_operand" "Ucj")
655 (match_operand:SI 4 "const_int_operand"))
656 (const_int BHSD_BITS)]
658 "TARGET_SVE2p1_OR_SME
659 && UINTVAL (operands[4]) < 128 / <BHSD_BITS>"
660 "psel\t%0, %1, %2.<bits_etype>[%w3, %4]"
663 ;; -------------------------------------------------------------------------
664 ;; ---- [PRED] Predicate count
665 ;; -------------------------------------------------------------------------
667 ;; - CNTP (predicate as counter)
668 ;; -------------------------------------------------------------------------
670 (define_insn "@aarch64_sve_cntp_c<BHSD_BITS>"
671 [(set (match_operand:DI 0 "register_operand" "=r")
673 [(match_operand:VNx16BI 1 "register_operand" "Upa")
674 (match_operand:DI 2 "const_int_operand")
675 (const_int BHSD_BITS)]
677 "TARGET_SVE2p1_OR_SME2"
678 "cntp\t%x0, %K1.<bits_etype>, vlx%2"
681 ;; =========================================================================
682 ;; == Uniform unary arithmnetic
683 ;; =========================================================================
685 ;; -------------------------------------------------------------------------
686 ;; ---- [FP] Multi-register unary operations
687 ;; -------------------------------------------------------------------------
693 ;; -------------------------------------------------------------------------
695 (define_insn "<frint_pattern><mode>2"
696 [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
698 [(match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")]
700 "TARGET_STREAMING_SME2"
701 "frint<frint_suffix>\t%0, %1"
704 ;; =========================================================================
705 ;; == Uniform binary arithmnetic
706 ;; =========================================================================
708 ;; -------------------------------------------------------------------------
709 ;; ---- [INT] Multi-register operations
710 ;; -------------------------------------------------------------------------
711 ;; Includes the multi-register forms of:
720 ;; -------------------------------------------------------------------------
722 (define_expand "<optab><mode>3"
723 [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
724 (SVE_INT_BINARY_MULTI:SVE_Ix24
725 (match_operand:SVE_Ix24 1 "aligned_register_operand" "Uw<vector_count>")
726 (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))]
727 "TARGET_STREAMING_SME2"
730 (define_insn "*<optab><mode>3"
731 [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
732 (SVE_INT_BINARY_MULTI:SVE_Ix24
733 (match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
734 (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")))]
735 "TARGET_STREAMING_SME2"
736 "<sve_int_op>\t%0, %0, %2"
739 (define_insn "@aarch64_sve_single_<optab><mode>"
740 [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
741 (SVE_INT_BINARY_SINGLE:SVE_Ix24
742 (match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
743 (vec_duplicate:SVE_Ix24
744 (match_operand:<VSINGLE> 2 "register_operand" "x"))))]
745 "TARGET_STREAMING_SME2"
746 "<sve_int_op>\t%0, %0, %2.<Vetype>"
749 (define_insn "@aarch64_sve_<sve_int_op><mode>"
750 [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
752 [(match_operand:SVE_Ix24 1 "aligned_register_operand" "%0")
753 (match_operand:SVE_Ix24 2 "aligned_register_operand" "Uw<vector_count>")]
754 SVE_INT_BINARY_MULTI))]
755 "TARGET_STREAMING_SME2"
756 "<sve_int_op>\t%0, %0, %2"
759 (define_insn "@aarch64_sve_single_<sve_int_op><mode>"
760 [(set (match_operand:SVE_Ix24 0 "aligned_register_operand" "=Uw<vector_count>")
762 [(match_operand:SVE_Ix24 1 "aligned_register_operand" "0")
763 (vec_duplicate:SVE_Ix24
764 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
765 SVE_INT_BINARY_MULTI))]
766 "TARGET_STREAMING_SME2"
767 "<sve_int_op>\t%0, %0, %2.<Vetype>"
770 ;; -------------------------------------------------------------------------
771 ;; ---- [INT] Clamp to minimum/maximum
772 ;; -------------------------------------------------------------------------
775 ;; -------------------------------------------------------------------------
777 ;; The minimum is applied after the maximum, which matters if the maximum
778 ;; bound is (unexpectedly) less than the minimum bound.
779 (define_insn "@aarch64_sve_<su>clamp<mode>"
780 [(set (match_operand:SVE_FULL_I 0 "register_operand")
781 (<max_opp>:SVE_FULL_I
783 (match_operand:SVE_FULL_I 1 "register_operand")
784 (match_operand:SVE_FULL_I 2 "register_operand"))
785 (match_operand:SVE_FULL_I 3 "register_operand")))]
786 "TARGET_SVE2p1_OR_SME"
787 {@ [cons: =0, 1, 2, 3; attrs: movprfx]
788 [ w, %0, w, w; * ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
789 [ ?&w, w, w, w; yes ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
793 (define_insn_and_split "*aarch64_sve_<su>clamp<mode>_x"
794 [(set (match_operand:SVE_FULL_I 0 "register_operand")
797 (<max_opp>:SVE_FULL_I
801 (match_operand:SVE_FULL_I 1 "register_operand")
802 (match_operand:SVE_FULL_I 2 "register_operand"))]
804 (match_operand:SVE_FULL_I 3 "register_operand"))]
806 "TARGET_SVE2p1_OR_SME"
807 {@ [cons: =0, 1, 2, 3; attrs: movprfx]
809 [ ?&w, w, w, w; yes ] #
813 (<max_opp>:SVE_FULL_I
820 (define_insn "@aarch64_sve_<su>clamp_single<mode>"
821 [(set (match_operand:SVE_Ix24 0 "register_operand" "=Uw<vector_count>")
824 (match_operand:SVE_Ix24 1 "register_operand" "0")
825 (vec_duplicate:SVE_Ix24
826 (match_operand:<VSINGLE> 2 "register_operand" "w")))
827 (vec_duplicate:SVE_Ix24
828 (match_operand:<VSINGLE> 3 "register_operand" "w"))))]
829 "TARGET_STREAMING_SME2"
830 "<su>clamp\t%0, %2.<Vetype>, %3.<Vetype>"
833 ;; -------------------------------------------------------------------------
834 ;; ---- [INT] Multiplication
835 ;; -------------------------------------------------------------------------
836 ;; Includes the lane and unpredicated forms of:
838 ;; -------------------------------------------------------------------------
840 (define_insn "@aarch64_mul_lane_<mode>"
841 [(set (match_operand:SVE_FULL_HSDI_SIMD_DI 0 "register_operand" "=w")
842 (mult:SVE_FULL_HSDI_SIMD_DI
843 (unspec:SVE_FULL_HSDI_SIMD_DI
844 [(match_operand:SVE_FULL_HSDI_SIMD_DI 2 "register_operand" "<sve_lane_con>")
845 (match_operand:SI 3 "const_int_operand")]
846 UNSPEC_SVE_LANE_SELECT)
847 (match_operand:SVE_FULL_HSDI_SIMD_DI 1 "register_operand" "w")))]
849 "mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>[%3]"
852 ;; The 2nd and 3rd alternatives are valid for just TARGET_SVE as well but
853 ;; we include them here to allow matching simpler, unpredicated RTL.
854 (define_insn "*aarch64_mul_unpredicated_<mode>"
855 [(set (match_operand:SVE_I_SIMD_DI 0 "register_operand")
857 (match_operand:SVE_I_SIMD_DI 1 "register_operand")
858 (match_operand:SVE_I_SIMD_DI 2 "aarch64_sve_vsm_operand")))]
860 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
861 [ w , w , w ; * ] mul\t%Z0.<Vetype>, %Z1.<Vetype>, %Z2.<Vetype>
862 [ w , 0 , vsm ; * ] mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
863 [ ?&w , w , vsm ; yes ] movprfx\t%Z0, %Z1\;mul\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
867 ;; -------------------------------------------------------------------------
868 ;; ---- [INT] Scaled high-part multiplication
869 ;; -------------------------------------------------------------------------
870 ;; The patterns in this section are synthetic.
871 ;; -------------------------------------------------------------------------
873 ;; Unpredicated integer multiply-high-with-(round-and-)scale.
874 (define_expand "<su>mulh<r>s<mode>3"
875 [(set (match_operand:SVE_FULL_BHSI 0 "register_operand")
876 (unspec:SVE_FULL_BHSI
878 (unspec:SVE_FULL_BHSI
879 [(match_operand:SVE_FULL_BHSI 1 "register_operand")
880 (match_operand:SVE_FULL_BHSI 2 "register_operand")]
885 operands[3] = aarch64_ptrue_reg (<VPRED>mode);
887 rtx prod_b = gen_reg_rtx (<VWIDE>mode);
888 rtx prod_t = gen_reg_rtx (<VWIDE>mode);
889 emit_insn (gen_aarch64_sve_<su>mullb<Vwide> (prod_b, operands[1],
891 emit_insn (gen_aarch64_sve_<su>mullt<Vwide> (prod_t, operands[1],
894 rtx shift = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1);
895 emit_insn (gen_aarch64_sve_<r>shrnb<Vwide> (operands[0], prod_b, shift));
896 emit_insn (gen_aarch64_sve_<r>shrnt<Vwide> (operands[0], operands[0],
903 ;; -------------------------------------------------------------------------
904 ;; ---- [INT] General binary arithmetic that maps to unspecs
905 ;; -------------------------------------------------------------------------
925 ;; -------------------------------------------------------------------------
927 ;; Integer average (floor).
928 (define_expand "<u>avg<mode>3_floor"
929 [(set (match_operand:SVE_FULL_I 0 "register_operand")
933 [(match_operand:SVE_FULL_I 1 "register_operand")
934 (match_operand:SVE_FULL_I 2 "register_operand")]
939 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
943 ;; Integer average (rounding).
944 (define_expand "<u>avg<mode>3_ceil"
945 [(set (match_operand:SVE_FULL_I 0 "register_operand")
949 [(match_operand:SVE_FULL_I 1 "register_operand")
950 (match_operand:SVE_FULL_I 2 "register_operand")]
955 operands[3] = force_reg (<VPRED>mode, CONSTM1_RTX (<VPRED>mode));
959 ;; The immediate form of SQADD acts as an immediate form of SUQADD
960 ;; over its full range. In contrast to the ss_plus pattern, we do
961 ;; not need to treat byte immediates specially. E.g.:
963 ;; SQADD Z0.B, Z0.B, #128
968 ;; SUQADD Z0.B, P0/M, Z0.B, Z1.B
970 ;; even though it's not equivalent to:
973 ;; SQADD Z0.B, P0/M, Z0.B, Z1.B // Saturating subtraction of 128
974 (define_insn "@aarch64_sve_suqadd<mode>_const"
975 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
977 [(match_operand:SVE_FULL_I 1 "register_operand" "0, w")
978 (match_operand:SVE_FULL_I 2 "aarch64_sve_arith_immediate")]
982 sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2
983 movprfx\t%0, %1\;sqadd\t%0.<Vetype>, %0.<Vetype>, #%D2"
984 [(set_attr "movprfx" "*,yes")]
987 ;; General predicated binary arithmetic. All operations handled here
988 ;; are commutative or have a reversed form.
989 (define_insn "@aarch64_pred_<sve_int_op><mode>"
990 [(set (match_operand:SVE_FULL_I 0 "register_operand")
992 [(match_operand:<VPRED> 1 "register_operand")
994 [(match_operand:SVE_FULL_I 2 "register_operand")
995 (match_operand:SVE_FULL_I 3 "register_operand")]
996 SVE2_COND_INT_BINARY_REV)]
999 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1000 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1001 [ w , Upl , w , 0 ; * ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1002 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1006 ;; Predicated binary arithmetic with merging.
1007 (define_expand "@cond_<sve_int_op><mode>"
1008 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1010 [(match_operand:<VPRED> 1 "register_operand")
1014 [(match_operand:SVE_FULL_I 2 "register_operand")
1015 (match_operand:SVE_FULL_I 3 "register_operand")]
1016 SVE2_COND_INT_BINARY)]
1018 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
1022 operands[5] = CONSTM1_RTX (<MODE>mode);
1026 ;; Predicated binary arithmetic, merging with the first input.
1027 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
1028 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1030 [(match_operand:<VPRED> 1 "register_operand")
1034 [(match_operand:SVE_FULL_I 2 "register_operand")
1035 (match_operand:SVE_FULL_I 3 "register_operand")]
1036 SVE2_COND_INT_BINARY)]
1041 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1042 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1043 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1045 "&& !CONSTANT_P (operands[4])"
1047 operands[4] = CONSTM1_RTX (<VPRED>mode);
1051 ;; Predicated binary arithmetic, merging with the second input.
1052 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
1053 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1055 [(match_operand:<VPRED> 1 "register_operand")
1059 [(match_operand:SVE_FULL_I 2 "register_operand")
1060 (match_operand:SVE_FULL_I 3 "register_operand")]
1061 SVE2_COND_INT_BINARY_REV)]
1066 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1067 [ w , Upl , w , 0 ; * ] <sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1068 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1070 "&& !CONSTANT_P (operands[4])"
1072 operands[4] = CONSTM1_RTX (<VPRED>mode);
1076 ;; Predicated binary operations, merging with an independent value.
1077 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
1078 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1080 [(match_operand:<VPRED> 1 "register_operand")
1084 [(match_operand:SVE_FULL_I 2 "register_operand")
1085 (match_operand:SVE_FULL_I 3 "register_operand")]
1086 SVE2_COND_INT_BINARY_REV)]
1088 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
1091 && !rtx_equal_p (operands[2], operands[4])
1092 && !rtx_equal_p (operands[3], operands[4])"
1093 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
1094 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1095 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1096 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1097 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1098 [ ?&w , Upl , w , w , w ] #
1102 if (reload_completed
1103 && register_operand (operands[4], <MODE>mode)
1104 && !rtx_equal_p (operands[0], operands[4]))
1106 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1107 operands[4], operands[1]));
1108 operands[4] = operands[2] = operands[0];
1110 else if (!CONSTANT_P (operands[5]))
1111 operands[5] = CONSTM1_RTX (<VPRED>mode);
1115 [(set_attr "movprfx" "yes")]
1118 ;; Predicated binary operations with no reverse form, merging with zero.
1119 ;; At present we don't generate these patterns via a cond_* optab,
1120 ;; so there's no correctness requirement to handle merging with an
1121 ;; independent value.
1122 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
1123 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1125 [(match_operand:<VPRED> 1 "register_operand")
1129 [(match_operand:SVE_FULL_I 2 "register_operand")
1130 (match_operand:SVE_FULL_I 3 "register_operand")]
1131 SVE2_COND_INT_BINARY_NOREV)]
1133 (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
1136 {@ [ cons: =0 , 1 , 2 , 3 ]
1137 [ &w , Upl , 0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1138 [ &w , Upl , w , w ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1140 "&& !CONSTANT_P (operands[5])"
1142 operands[5] = CONSTM1_RTX (<VPRED>mode);
1144 [(set_attr "movprfx" "yes")]
1147 ;; -------------------------------------------------------------------------
1148 ;; ---- [INT] Saturating binary arithmetic
1149 ;; -------------------------------------------------------------------------
1153 ;; -------------------------------------------------------------------------
1155 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1156 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
1158 [(match_operand:SVE_FULL_I 1 "register_operand" "w")
1159 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
1162 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
1165 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
1166 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
1167 (unspec:SVE_FULL_HSDI
1168 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
1169 (unspec:SVE_FULL_HSDI
1170 [(match_operand:SVE_FULL_HSDI 2 "register_operand" "<sve_lane_con>")
1171 (match_operand:SI 3 "const_int_operand")]
1172 UNSPEC_SVE_LANE_SELECT)]
1173 SVE2_INT_BINARY_LANE))]
1175 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
1178 ;; -------------------------------------------------------------------------
1179 ;; ---- [INT] Saturating left shifts
1180 ;; -------------------------------------------------------------------------
1186 ;; -------------------------------------------------------------------------
1188 ;; Predicated left shifts.
1189 (define_insn "@aarch64_pred_<sve_int_op><mode>"
1190 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1192 [(match_operand:<VPRED> 1 "register_operand")
1194 [(match_operand:SVE_FULL_I 2 "register_operand")
1195 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
1196 SVE2_COND_INT_SHIFT)]
1199 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1200 [ w , Upl , 0 , D<lr> ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1201 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1202 [ w , Upl , w , 0 ; * ] <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1203 [ ?&w , Upl , w , D<lr> ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1204 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1208 ;; Predicated left shifts with merging.
1209 (define_expand "@cond_<sve_int_op><mode>"
1210 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1212 [(match_operand:<VPRED> 1 "register_operand")
1216 [(match_operand:SVE_FULL_I 2 "register_operand")
1217 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
1218 SVE2_COND_INT_SHIFT)]
1220 (match_operand:SVE_FULL_I 4 "register_operand")]
1224 operands[5] = CONSTM1_RTX (<VPRED>mode);
1228 ;; Predicated left shifts, merging with the first input.
1229 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
1230 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1232 [(match_operand:<VPRED> 1 "register_operand")
1236 [(match_operand:SVE_FULL_I 2 "register_operand")
1237 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
1238 SVE2_COND_INT_SHIFT)]
1243 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1244 [ w , Upl , 0 , D<lr> ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1245 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1246 [ ?&w , Upl , w , D<lr> ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1247 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1249 "&& !CONSTANT_P (operands[4])"
1251 operands[4] = CONSTM1_RTX (<VPRED>mode);
1255 ;; Predicated left shifts, merging with the second input.
1256 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_3"
1257 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1259 [(match_operand:<VPRED> 1 "register_operand")
1263 [(match_operand:SVE_FULL_I 2 "register_operand")
1264 (match_operand:SVE_FULL_I 3 "register_operand")]
1265 SVE2_COND_INT_SHIFT)]
1270 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1271 [ w , Upl , w , 0 ; * ] <sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1272 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1274 "&& !CONSTANT_P (operands[4])"
1276 operands[4] = CONSTM1_RTX (<VPRED>mode);
1280 ;; Predicated left shifts, merging with an independent value.
1281 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_any"
1282 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1284 [(match_operand:<VPRED> 1 "register_operand")
1288 [(match_operand:SVE_FULL_I 2 "register_operand")
1289 (match_operand:SVE_FULL_I 3 "aarch64_sve_<lr>shift_operand")]
1290 SVE2_COND_INT_SHIFT)]
1292 (match_operand:SVE_FULL_I 4 "aarch64_simd_reg_or_zero")]
1295 && !rtx_equal_p (operands[2], operands[4])
1296 && (CONSTANT_P (operands[4]) || !rtx_equal_p (operands[3], operands[4]))"
1297 {@ [ cons: =0 , 1 , 2 , 3 , 4 ]
1298 [ &w , Upl , 0 , D<lr> , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1299 [ &w , Upl , 0 , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1300 [ &w , Upl , w , 0 , Dz ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>r\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1301 [ &w , Upl , w , D<lr> , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1302 [ &w , Upl , w , w , Dz ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1303 [ &w , Upl , w , D<lr> , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
1304 [ &w , Upl , w , w , 0 ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1305 [ ?&w , Upl , w , D<lr> , w ] #
1306 [ ?&w , Upl , w , w , w ] #
1310 if (reload_completed
1311 && register_operand (operands[4], <MODE>mode)
1312 && !rtx_equal_p (operands[0], operands[4]))
1314 emit_insn (gen_vcond_mask_<mode><vpred> (operands[0], operands[2],
1315 operands[4], operands[1]));
1316 operands[4] = operands[2] = operands[0];
1318 else if (!CONSTANT_P (operands[5]))
1319 operands[5] = CONSTM1_RTX (<VPRED>mode);
1323 [(set_attr "movprfx" "yes")]
1326 ;; -------------------------------------------------------------------------
1327 ;; ---- [FP] Non-widening bfloat16 arithmetic
1328 ;; -------------------------------------------------------------------------
1330 ;; - BFADD (SVE_B16B16)
1331 ;; - BFMAX (SVE_B16B16)
1332 ;; - BFMAXNM (SVE_B16B16)
1333 ;; - BFMIN (SVE_B16B16)
1334 ;; - BFMINNM (SVE_B16B16)
1335 ;; - BFMUL (SVE_B16B16)
1336 ;; -------------------------------------------------------------------------
1338 ;; Predicated B16B16 binary operations.
1339 (define_insn "@aarch64_pred_<optab><mode>"
1340 [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
1342 [(match_operand:<VPRED> 1 "register_operand")
1343 (match_operand:SI 4 "aarch64_sve_gp_strictness")
1344 (match_operand:VNx8BF_ONLY 2 "register_operand")
1345 (match_operand:VNx8BF_ONLY 3 "register_operand")]
1346 SVE_COND_FP_BINARY_OPTAB))]
1347 "TARGET_SSVE_B16B16 && <supports_bf16>"
1348 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx , is_rev ]
1349 [ w , Upl , 0 , w ; * , * ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1350 [ w , Upl , w , 0 ; * , true ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
1351 [ ?&w , Upl , w , w ; yes , * ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
1353 [(set_attr "is_bf16" "<is_bf16>")
1354 (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
1357 ;; -------------------------------------------------------------------------
1358 ;; ---- [FP] Clamp to minimum/maximum
1359 ;; -------------------------------------------------------------------------
1360 ;; - BFCLAMP (SVE_B16B16)
1362 ;; -------------------------------------------------------------------------
1364 ;; The minimum is applied after the maximum, which matters if the maximum
1365 ;; bound is (unexpectedly) less than the minimum bound.
1366 (define_insn "@aarch64_sve_fclamp<mode>"
1367 [(set (match_operand:SVE_CLAMP_F 0 "register_operand")
1369 [(unspec:SVE_CLAMP_F
1370 [(match_operand:SVE_CLAMP_F 1 "register_operand")
1371 (match_operand:SVE_CLAMP_F 2 "register_operand")]
1373 (match_operand:SVE_CLAMP_F 3 "register_operand")]
1376 {@ [cons: =0, 1, 2, 3; attrs: movprfx]
1377 [ w, %0, w, w; * ] <b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1378 [ ?&w, w, w, w; yes ] movprfx\t%0, %1\;<b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1382 (define_insn_and_split "*aarch64_sve_fclamp<mode>_x"
1383 [(set (match_operand:SVE_CLAMP_F 0 "register_operand")
1386 (const_int SVE_RELAXED_GP)
1389 (const_int SVE_RELAXED_GP)
1390 (match_operand:SVE_CLAMP_F 1 "register_operand")
1391 (match_operand:SVE_CLAMP_F 2 "register_operand")]
1393 (match_operand:SVE_CLAMP_F 3 "register_operand")]
1394 UNSPEC_COND_FMINNM))]
1396 {@ [cons: =0, 1, 2, 3; attrs: movprfx]
1397 [ w, %0, w, w; * ] #
1398 [ ?&w, w, w, w; yes ] #
1403 [(unspec:SVE_CLAMP_F
1411 (define_insn "@aarch64_sve_fclamp_single<mode>"
1412 [(set (match_operand:SVE_Fx24 0 "register_operand" "=Uw<vector_count>")
1415 [(match_operand:SVE_Fx24 1 "register_operand" "0")
1416 (vec_duplicate:SVE_Fx24
1417 (match_operand:<VSINGLE> 2 "register_operand" "w"))]
1419 (vec_duplicate:SVE_Fx24
1420 (match_operand:<VSINGLE> 3 "register_operand" "w"))]
1422 "TARGET_STREAMING_SME2"
1423 "<b>fclamp\t%0, %2.<Vetype>, %3.<Vetype>"
1426 ;; =========================================================================
1427 ;; == Uniform ternary arithmnetic
1428 ;; =========================================================================
1430 ;; -------------------------------------------------------------------------
1431 ;; ---- [INT] General ternary arithmetic that maps to unspecs
1432 ;; -------------------------------------------------------------------------
1442 ;; -------------------------------------------------------------------------
1444 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1445 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1447 [(match_operand:SVE_FULL_I 2 "register_operand")
1448 (match_operand:SVE_FULL_I 3 "register_operand")
1449 (match_operand:SVE_FULL_I 1 "register_operand")]
1452 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1453 [ w , 0 , w , w ; * ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1454 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
1458 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
1459 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
1460 (unspec:SVE_FULL_HSDI
1461 [(match_operand:SVE_FULL_HSDI 2 "register_operand")
1462 (unspec:SVE_FULL_HSDI
1463 [(match_operand:SVE_FULL_HSDI 3 "register_operand")
1464 (match_operand:SI 4 "const_int_operand")]
1465 UNSPEC_SVE_LANE_SELECT)
1466 (match_operand:SVE_FULL_HSDI 1 "register_operand")]
1467 SVE2_INT_TERNARY_LANE))]
1469 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1470 [ w , 0 , w , <sve_lane_con> ; * ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1471 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1475 ;; -------------------------------------------------------------------------
1476 ;; ---- [INT] Multiply-and-accumulate operations
1477 ;; -------------------------------------------------------------------------
1478 ;; Includes the lane forms of:
1481 ;; -------------------------------------------------------------------------
1483 (define_insn "@aarch64_sve_add_mul_lane_<mode>"
1484 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
1487 (unspec:SVE_FULL_HSDI
1488 [(match_operand:SVE_FULL_HSDI 3 "register_operand")
1489 (match_operand:SI 4 "const_int_operand")]
1490 UNSPEC_SVE_LANE_SELECT)
1491 (match_operand:SVE_FULL_HSDI 2 "register_operand"))
1492 (match_operand:SVE_FULL_HSDI 1 "register_operand")))]
1494 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1495 [ w , 0 , w , <sve_lane_con> ; * ] mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1496 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;mla\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1500 (define_insn "@aarch64_sve_sub_mul_lane_<mode>"
1501 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
1502 (minus:SVE_FULL_HSDI
1503 (match_operand:SVE_FULL_HSDI 1 "register_operand")
1505 (unspec:SVE_FULL_HSDI
1506 [(match_operand:SVE_FULL_HSDI 3 "register_operand")
1507 (match_operand:SI 4 "const_int_operand")]
1508 UNSPEC_SVE_LANE_SELECT)
1509 (match_operand:SVE_FULL_HSDI 2 "register_operand"))))]
1511 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1512 [ w , 0 , w , <sve_lane_con> ; * ] mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1513 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;mls\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4]
1517 ;; -------------------------------------------------------------------------
1518 ;; ---- [INT] Binary logic operations with rotation
1519 ;; -------------------------------------------------------------------------
1522 ;; -------------------------------------------------------------------------
1524 ;; Also allow the Advanced SIMD modes as the the SVE2 XAR instruction
1525 ;; can handle more element sizes than the TARGET_SHA3 one from Advanced SIMD.
1526 ;; Don't allow the V2DImode use here unless !TARGET_SHA3 as the Advanced SIMD
1527 ;; version should be preferred when available as it is non-destructive on its
1529 (define_insn "@aarch64_sve2_xar<mode>"
1530 [(set (match_operand:SVE_ASIMD_FULL_I 0 "register_operand" "=w,?&w")
1531 (rotate:SVE_ASIMD_FULL_I
1532 (xor:SVE_ASIMD_FULL_I
1533 (match_operand:SVE_ASIMD_FULL_I 1 "register_operand" "%0,w")
1534 (match_operand:SVE_ASIMD_FULL_I 2 "register_operand" "w,w"))
1535 (match_operand:SVE_ASIMD_FULL_I 3 "aarch64_simd_lshift_imm")))]
1536 "TARGET_SVE2 && !(<MODE>mode == V2DImode && TARGET_SHA3)"
1539 = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)
1540 - INTVAL (unwrap_const_vec_duplicate (operands[3])));
1541 if (which_alternative == 0)
1542 return "xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
1543 return "movprfx\t%Z0, %Z1\;xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
1545 [(set_attr "movprfx" "*,yes")]
1548 ;; -------------------------------------------------------------------------
1549 ;; ---- [INT] Ternary logic operations
1550 ;; -------------------------------------------------------------------------
1558 ;; -------------------------------------------------------------------------
1560 ;; Unpredicated exclusive OR of AND.
1561 (define_expand "@aarch64_sve2_bcax<mode>"
1562 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1568 (match_operand:SVE_FULL_I 3 "register_operand"))]
1570 (match_operand:SVE_FULL_I 2 "register_operand"))
1571 (match_operand:SVE_FULL_I 1 "register_operand")))]
1574 operands[4] = CONSTM1_RTX (<VPRED>mode);
1578 (define_insn_and_rewrite "*aarch64_sve2_bcax<mode>"
1579 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1585 (match_operand:SVE_FULL_I 3 "register_operand"))]
1587 (match_operand:SVE_FULL_I 2 "register_operand"))
1588 (match_operand:SVE_FULL_I 1 "register_operand")))]
1590 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1591 [ w , 0 , w , w ; * ] bcax\t%0.d, %0.d, %2.d, %3.d
1592 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;bcax\t%0.d, %0.d, %2.d, %3.d
1594 "&& !CONSTANT_P (operands[4])"
1596 operands[4] = CONSTM1_RTX (<VPRED>mode);
1600 ;; Unpredicated 3-way exclusive OR.
1601 (define_insn "@aarch64_sve2_eor3<mode>"
1602 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1605 (match_operand:SVE_FULL_I 1 "register_operand")
1606 (match_operand:SVE_FULL_I 2 "register_operand"))
1607 (match_operand:SVE_FULL_I 3 "register_operand")))]
1609 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1610 [ w , 0 , w , w ; * ] eor3\t%0.d, %0.d, %2.d, %3.d
1611 [ w , w , 0 , w ; * ] eor3\t%0.d, %0.d, %1.d, %3.d
1612 [ w , w , w , 0 ; * ] eor3\t%0.d, %0.d, %1.d, %2.d
1613 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;eor3\t%0.d, %0.d, %2.d, %3.d
1617 ;; Use NBSL for vector NOR.
1618 (define_insn_and_rewrite "*aarch64_sve2_nor<mode>"
1619 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1624 (match_operand:SVE_FULL_I 1 "register_operand"))
1626 (match_operand:SVE_FULL_I 2 "register_operand")))]
1629 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
1630 [ w , %0 , w ; * ] nbsl\t%0.d, %0.d, %2.d, %0.d
1631 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d
1633 "&& !CONSTANT_P (operands[3])"
1635 operands[3] = CONSTM1_RTX (<VPRED>mode);
1639 ;; Use NBSL for vector NAND.
1640 (define_insn_and_rewrite "*aarch64_sve2_nand<mode>"
1641 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1646 (match_operand:SVE_FULL_I 1 "register_operand"))
1648 (match_operand:SVE_FULL_I 2 "register_operand")))]
1651 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
1652 [ w , %0 , w ; * ] nbsl\t%0.d, %0.d, %2.d, %2.d
1653 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d
1655 "&& !CONSTANT_P (operands[3])"
1657 operands[3] = CONSTM1_RTX (<VPRED>mode);
1661 ;; Unpredicated bitwise select.
1662 ;; (op3 ? bsl_mov : bsl_dup) == (((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
1663 (define_expand "@aarch64_sve2_bsl<mode>"
1664 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1668 (match_operand:SVE_FULL_I 1 "register_operand")
1669 (match_operand:SVE_FULL_I 2 "register_operand"))
1670 (match_operand:SVE_FULL_I 3 "register_operand"))
1675 (define_insn "*aarch64_sve2_bsl<mode>"
1676 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1680 (match_operand:SVE_FULL_I 1 "register_operand")
1681 (match_operand:SVE_FULL_I 2 "register_operand"))
1682 (match_operand:SVE_FULL_I 3 "register_operand"))
1683 (match_dup BSL_DUP)))]
1685 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1686 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1687 [ ?&w , w , w , w ; yes ] movprfx\t%0, %<bsl_mov>\;bsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1691 ;; Unpredicated bitwise inverted select.
1692 ;; (~(op3 ? bsl_mov : bsl_dup)) == (~(((bsl_mov ^ bsl_dup) & op3) ^ bsl_dup))
1693 (define_expand "@aarch64_sve2_nbsl<mode>"
1694 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1701 (match_operand:SVE_FULL_I 1 "register_operand")
1702 (match_operand:SVE_FULL_I 2 "register_operand"))
1703 (match_operand:SVE_FULL_I 3 "register_operand"))
1708 operands[4] = CONSTM1_RTX (<VPRED>mode);
1712 (define_insn_and_rewrite "*aarch64_sve2_nbsl<mode>"
1713 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1720 (match_operand:SVE_FULL_I 1 "register_operand")
1721 (match_operand:SVE_FULL_I 2 "register_operand"))
1722 (match_operand:SVE_FULL_I 3 "register_operand"))
1723 (match_dup BSL_DUP)))]
1726 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1727 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1728 [ ?&w , w , w , w ; yes ] movprfx\t%0, %<bsl_mov>\;nbsl\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1730 "&& !CONSTANT_P (operands[4])"
1732 operands[4] = CONSTM1_RTX (<VPRED>mode);
1736 (define_insn "*aarch64_sve2_nbsl_unpred<mode>"
1737 [(set (match_operand:VDQ_I 0 "register_operand")
1742 (match_operand:VDQ_I 1 "register_operand")
1743 (match_operand:VDQ_I 2 "register_operand"))
1744 (match_operand:VDQ_I 3 "register_operand"))
1745 (match_dup BSL_DUP))))]
1747 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1748 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1749 [ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1753 ;; Unpredicated bitwise select with inverted first operand.
1754 ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
1755 (define_expand "@aarch64_sve2_bsl1n<mode>"
1756 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1763 (match_operand:SVE_FULL_I 1 "register_operand")
1764 (match_operand:SVE_FULL_I 2 "register_operand")))]
1766 (match_operand:SVE_FULL_I 3 "register_operand"))
1770 operands[4] = CONSTM1_RTX (<VPRED>mode);
1774 (define_insn_and_rewrite "*aarch64_sve2_bsl1n<mode>"
1775 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1782 (match_operand:SVE_FULL_I 1 "register_operand")
1783 (match_operand:SVE_FULL_I 2 "register_operand")))]
1785 (match_operand:SVE_FULL_I 3 "register_operand"))
1786 (match_dup BSL_DUP)))]
1788 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1789 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1790 [ ?&w , w , w , w ; yes ] movprfx\t%0, %<bsl_mov>\;bsl1n\t%0.d, %0.d, %<bsl_dup>.d, %3.d
1792 "&& !CONSTANT_P (operands[4])"
1794 operands[4] = CONSTM1_RTX (<VPRED>mode);
1798 (define_insn "*aarch64_sve2_bsl1n_unpred<mode>"
1799 [(set (match_operand:VDQ_I 0 "register_operand")
1804 (match_operand:VDQ_I 1 "register_operand")
1805 (match_operand:VDQ_I 2 "register_operand")))
1806 (match_operand:VDQ_I 3 "register_operand"))
1807 (match_dup BSL_DUP)))]
1809 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1810 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1811 [ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
1815 ;; Unpredicated bitwise select with inverted second operand.
1816 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
1817 (define_expand "@aarch64_sve2_bsl2n<mode>"
1818 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1821 (match_operand:SVE_FULL_I 1 "register_operand")
1822 (match_operand:SVE_FULL_I 3 "register_operand"))
1827 (match_operand:SVE_FULL_I 2 "register_operand"))
1833 operands[4] = CONSTM1_RTX (<VPRED>mode);
1837 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
1838 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1841 (match_operand:SVE_FULL_I 1 "register_operand")
1842 (match_operand:SVE_FULL_I 2 "register_operand"))
1847 (match_operand:SVE_FULL_I 3 "register_operand"))
1849 (match_dup BSL_DUP)))]
1852 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1853 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1854 [ ?&w , w , w , w ; yes ] movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1856 "&& !CONSTANT_P (operands[4])"
1858 operands[4] = CONSTM1_RTX (<VPRED>mode);
1862 ;; Unpredicated bitwise select with inverted second operand, alternative form.
1863 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~bsl_dup & ~op3))
1864 (define_insn_and_rewrite "*aarch64_sve2_bsl2n<mode>"
1865 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1868 (match_operand:SVE_FULL_I 1 "register_operand")
1869 (match_operand:SVE_FULL_I 2 "register_operand"))
1874 (match_dup BSL_DUP))
1876 (match_operand:SVE_FULL_I 3 "register_operand")))]
1879 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1880 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1881 [ ?&w , w , w , w ; yes ] movprfx\t%0, %<bsl_mov>\;bsl2n\t%0.d, %0.d, %3.d, %<bsl_dup>.d
1883 "&& !CONSTANT_P (operands[4])"
1885 operands[4] = CONSTM1_RTX (<VPRED>mode);
1889 (define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
1890 [(set (match_operand:VDQ_I 0 "register_operand")
1893 (match_operand:VDQ_I 1 "register_operand")
1894 (match_operand:VDQ_I 2 "register_operand"))
1896 (not:VDQ_I (match_operand:VDQ_I 3 "register_operand"))
1897 (not:VDQ_I (match_dup BSL_DUP)))))]
1899 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1900 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1901 [ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1905 (define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
1906 [(set (match_operand:VDQ_I 0 "register_operand")
1909 (match_operand:VDQ_I 1 "register_operand")
1910 (match_operand:VDQ_I 2 "register_operand"))
1912 (not:VDQ_I (match_dup BSL_DUP))
1913 (not:VDQ_I (match_operand:VDQ_I 3 "register_operand")))))]
1915 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
1916 [ w , <bsl_1st> , <bsl_2nd> , w ; * ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1917 [ ?&w , w , w , w ; yes ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
1921 ;; -------------------------------------------------------------------------
1922 ;; ---- [INT] Shift-and-accumulate operations
1923 ;; -------------------------------------------------------------------------
1929 ;; -------------------------------------------------------------------------
1931 ;; Provide the natural unpredicated interface for SSRA and USRA.
1932 (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
1933 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1938 (match_operand:SVE_FULL_I 2 "register_operand")
1939 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
1941 (match_operand:SVE_FULL_I 1 "register_operand")))]
1944 operands[4] = CONSTM1_RTX (<VPRED>mode);
1948 ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
1950 (define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
1951 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1956 (match_operand:SVE_FULL_I 2 "register_operand")
1957 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
1959 (match_operand:SVE_FULL_I 1 "register_operand")))]
1961 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
1962 [ w , 0 , w ; * ] <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1963 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1965 "&& !CONSTANT_P (operands[4])"
1967 operands[4] = CONSTM1_RTX (<VPRED>mode);
1972 (define_insn "@aarch64_sve_add_<sve_int_op><mode>"
1973 [(set (match_operand:SVE_FULL_I 0 "register_operand")
1976 [(match_operand:SVE_FULL_I 2 "register_operand")
1977 (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")]
1979 (match_operand:SVE_FULL_I 1 "register_operand")))]
1981 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
1982 [ w , 0 , w ; * ] <sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1983 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;<sur>sra\t%0.<Vetype>, %2.<Vetype>, #%3
1987 ;; -------------------------------------------------------------------------
1988 ;; ---- [INT] Shift-and-insert operations
1989 ;; -------------------------------------------------------------------------
1993 ;; -------------------------------------------------------------------------
1995 ;; These instructions do not take MOVPRFX.
1996 (define_insn "@aarch64_sve_<sve_int_op><mode>"
1997 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
1999 [(match_operand:SVE_FULL_I 1 "register_operand" "0")
2000 (match_operand:SVE_FULL_I 2 "register_operand" "w")
2001 (match_operand:SVE_FULL_I 3 "aarch64_simd_<lr>shift_imm")]
2002 SVE2_INT_SHIFT_INSERT))]
2004 "<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, #%3"
2007 ;; -------------------------------------------------------------------------
2008 ;; ---- [INT] Sum of absolute differences
2009 ;; -------------------------------------------------------------------------
2013 ;; -------------------------------------------------------------------------
2015 ;; Provide the natural unpredicated interface for SABA and UABA.
2016 (define_expand "@aarch64_sve2_<su>aba<mode>"
2017 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w, ?&w")
2023 (match_operand:SVE_FULL_I 2 "register_operand" "w, w")
2024 (match_operand:SVE_FULL_I 3 "register_operand" "w, w"))]
2028 (<max_opp>:SVE_FULL_I
2032 (match_operand:SVE_FULL_I 1 "register_operand" "0, w")))]
2035 operands[4] = CONSTM1_RTX (<VPRED>mode);
2039 ;; Pattern-match SABA and UABA as an absolute-difference-and-accumulate
2040 ;; operation whose predicates aren't needed.
2041 (define_insn "*aarch64_sve2_<su>aba<mode>"
2042 [(set (match_operand:SVE_FULL_I 0 "register_operand")
2048 (match_operand:SVE_FULL_I 2 "register_operand")
2049 (match_operand:SVE_FULL_I 3 "register_operand"))]
2053 (<max_opp>:SVE_FULL_I
2057 (match_operand:SVE_FULL_I 1 "register_operand")))]
2059 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2060 [ w , 0 , w , w ; * ] <su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
2061 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<su>aba\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
2065 ;; -------------------------------------------------------------------------
2066 ;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
2067 ;; -------------------------------------------------------------------------
2069 ;; - FMLALB (vectors, FP8 to FP16) (FP8FMA)
2070 ;; - FMLALT (vectors, FP8 to FP16) (FP8FMA)
2071 ;; - FMLALB (indexed, FP8 to FP16) (FP8FMA)
2072 ;; - FMLALT (indexed, FP8 to FP16) (FP8FMA)
2073 ;; - FMLALLBB (vectors) (FP8FMA)
2074 ;; - FMLALLBB (indexed) (FP8FMA)
2075 ;; - FMLALLBT (vectors) (FP8FMA)
2076 ;; - FMLALLBT (indexed) (FP8FMA)
2077 ;; - FMLALLTB (vectors) (FP8FMA)
2078 ;; - FMLALLTB (indexed) (FP8FMA)
2079 ;; - FMLALLTT (vectors) (FP8FMA)
2080 ;; - FMLALLTT (indexed) (FP8FMA)
2081 ;; -------------------------------------------------------------------------
2083 (define_insn "@aarch64_sve_add_<insn><mode>"
2084 [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
2086 [(match_operand:VNx8HF 1 "register_operand")
2087 (match_operand:VNx16QI 2 "register_operand")
2088 (match_operand:VNx16QI 3 "register_operand")
2089 (reg:DI FPM_REGNUM)]
2091 "TARGET_SSVE_FP8FMA"
2092 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2093 [ w , 0 , w , w ; * ] <insn>\t%0.h, %2.b, %3.b
2094 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b
2098 (define_insn "@aarch64_sve_add_<insn><mode>"
2099 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2101 [(match_operand:VNx4SF 1 "register_operand")
2102 (match_operand:VNx16QI 2 "register_operand")
2103 (match_operand:VNx16QI 3 "register_operand")
2104 (reg:DI FPM_REGNUM)]
2106 "TARGET_SSVE_FP8FMA"
2107 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2108 [ w , 0 , w , w ; * ] <insn>\t%0.s, %2.b, %3.b
2109 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b
2113 (define_insn "@aarch64_sve_add_lane_<insn><mode>"
2114 [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
2116 [(match_operand:VNx8HF 1 "register_operand")
2117 (match_operand:VNx16QI 2 "register_operand")
2118 (match_operand:VNx16QI 3 "register_operand")
2119 (match_operand:SI 4 "const_int_operand")
2120 (reg:DI FPM_REGNUM)]
2122 "TARGET_SSVE_FP8FMA"
2123 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2124 [ w , 0 , w , y ; * ] <insn>\t%0.h, %2.b, %3.b[%4]
2125 [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b[%4]
2129 (define_insn "@aarch64_sve_add_lane_<insn><mode>"
2130 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2132 [(match_operand:VNx4SF 1 "register_operand")
2133 (match_operand:VNx16QI 2 "register_operand")
2134 (match_operand:VNx16QI 3 "register_operand")
2135 (match_operand:SI 4 "const_int_operand")
2136 (reg:DI FPM_REGNUM)]
2138 "TARGET_SSVE_FP8FMA"
2139 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2140 [ w , 0 , w , y ; * ] <insn>\t%0.s, %2.b, %3.b[%4]
2141 [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b[%4]
2145 ;; -------------------------------------------------------------------------
2146 ;; ---- [FP] Mfloat8 dot products
2147 ;; -------------------------------------------------------------------------
2149 ;; - FDOT (4-way, vectors) (FP8DOT4)
2150 ;; - FDOT (4-way, indexed) (FP8DOT4)
2151 ;; - FDOT (2-way, vectors) (FP8DOT2)
2152 ;; - FDOT (2-way, indexed) (FP8DOT2)
2153 ;; -------------------------------------------------------------------------
2154 (define_insn "@aarch64_sve_dot<mode>"
2155 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
2156 (unspec:SVE_FULL_HSF
2157 [(match_operand:SVE_FULL_HSF 1 "register_operand")
2158 (match_operand:VNx16QI 2 "register_operand")
2159 (match_operand:VNx16QI 3 "register_operand")
2160 (reg:DI FPM_REGNUM)]
2162 "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
2163 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2164 [ w , 0 , w , w ; * ] fdot\t%0.<Vetype>, %2.b, %3.b
2165 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b
2169 (define_insn "@aarch64_sve_dot_lane<mode>"
2170 [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
2171 (unspec:SVE_FULL_HSF
2172 [(match_operand:SVE_FULL_HSF 1 "register_operand")
2173 (match_operand:VNx16QI 2 "register_operand")
2174 (match_operand:VNx16QI 3 "register_operand")
2175 (match_operand:SI 4 "const_int_operand")
2176 (reg:DI FPM_REGNUM)]
2177 UNSPEC_DOT_LANE_FP8))]
2178 "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
2179 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2180 [ w , 0 , w , y ; * ] fdot\t%0.<Vetype>, %2.b, %3.b[%4]
2181 [ ?&w , w , w , y ; yes ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b[%4]
2185 ;; =========================================================================
2186 ;; == Extending arithmetic
2187 ;; =========================================================================
2189 ;; -------------------------------------------------------------------------
2190 ;; ---- [INT] Multi-register widening conversions
2191 ;; -------------------------------------------------------------------------
2195 ;; -------------------------------------------------------------------------
2197 (define_insn "<optab><mode><v2xwide>2"
2198 [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw2")
2199 (ANY_EXTEND:<V2XWIDE>
2200 (match_operand:SVE_FULL_BHSI 1 "register_operand" "w")))]
2201 "TARGET_STREAMING_SME2"
2202 "<su>unpk\t%0, %1.<Vetype>"
2205 (define_insn "<optab><mode><v2xwide>2"
2206 [(set (match_operand:<V2XWIDE> 0 "aligned_register_operand" "=Uw4")
2207 (ANY_EXTEND:<V2XWIDE>
2208 (match_operand:SVE_FULL_BHSIx2 1 "aligned_register_operand" "Uw2")))]
2209 "TARGET_STREAMING_SME2"
2213 ;; -------------------------------------------------------------------------
2214 ;; ---- [INT] Wide binary arithmetic
2215 ;; -------------------------------------------------------------------------
2225 ;; -------------------------------------------------------------------------
2227 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2228 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2229 (unspec:SVE_FULL_HSDI
2230 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
2231 (match_operand:<VNARROW> 2 "register_operand" "w")]
2232 SVE2_INT_BINARY_WIDE))]
2234 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Ventype>"
2237 ;; -------------------------------------------------------------------------
2238 ;; ---- [INT] Long binary arithmetic
2239 ;; -------------------------------------------------------------------------
2262 ;; -------------------------------------------------------------------------
2264 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2265 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2266 (unspec:SVE_FULL_HSDI
2267 [(match_operand:<VNARROW> 1 "register_operand" "w")
2268 (match_operand:<VNARROW> 2 "register_operand" "w")]
2269 SVE2_INT_BINARY_LONG))]
2271 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
2274 (define_insn "@aarch64_sve_<sve_int_op>_lane_<mode>"
2275 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
2276 (unspec:SVE_FULL_SDI
2277 [(match_operand:<VNARROW> 1 "register_operand" "w")
2279 [(match_operand:<VNARROW> 2 "register_operand" "<sve_lane_con>")
2280 (match_operand:SI 3 "const_int_operand")]
2281 UNSPEC_SVE_LANE_SELECT)]
2282 SVE2_INT_BINARY_LONG_LANE))]
2284 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]"
2287 ;; -------------------------------------------------------------------------
2288 ;; ---- [INT] Long left shifts
2289 ;; -------------------------------------------------------------------------
2295 ;; -------------------------------------------------------------------------
2297 ;; The immediate range is enforced before generating the instruction.
2298 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2299 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2300 (unspec:SVE_FULL_HSDI
2301 [(match_operand:<VNARROW> 1 "register_operand" "w")
2302 (match_operand:DI 2 "const_int_operand")]
2303 SVE2_INT_SHIFT_IMM_LONG))]
2305 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, #%2"
2308 ;; -------------------------------------------------------------------------
2309 ;; ---- [INT] Long binary arithmetic with accumulation
2310 ;; -------------------------------------------------------------------------
2314 ;; - SDOT (SME2 or SVE2p1)
2327 ;; - UDOT (SME2 or SVE2p1)
2332 ;; -------------------------------------------------------------------------
2334 ;; Non-saturating MLA operations.
2335 (define_insn "@aarch64_sve_add_<sve_int_op><mode>"
2336 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2338 (unspec:SVE_FULL_HSDI
2339 [(match_operand:<VNARROW> 2 "register_operand")
2340 (match_operand:<VNARROW> 3 "register_operand")]
2341 SVE2_INT_ADD_BINARY_LONG)
2342 (match_operand:SVE_FULL_HSDI 1 "register_operand")))]
2344 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2345 [ w , 0 , w , w ; * ] <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2346 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2350 ;; Non-saturating MLA operations with lane select.
2351 (define_insn "@aarch64_sve_add_<sve_int_op>_lane_<mode>"
2352 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
2354 (unspec:SVE_FULL_SDI
2355 [(match_operand:<VNARROW> 2 "register_operand")
2357 [(match_operand:<VNARROW> 3 "register_operand")
2358 (match_operand:SI 4 "const_int_operand")]
2359 UNSPEC_SVE_LANE_SELECT)]
2360 SVE2_INT_ADD_BINARY_LONG_LANE)
2361 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
2363 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2364 [ w , 0 , w , <sve_lane_con> ; * ] <sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2365 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_add_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2369 ;; Saturating MLA operations.
2370 (define_insn "@aarch64_sve_qadd_<sve_int_op><mode>"
2371 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2372 (ss_plus:SVE_FULL_HSDI
2373 (unspec:SVE_FULL_HSDI
2374 [(match_operand:<VNARROW> 2 "register_operand")
2375 (match_operand:<VNARROW> 3 "register_operand")]
2376 SVE2_INT_QADD_BINARY_LONG)
2377 (match_operand:SVE_FULL_HSDI 1 "register_operand")))]
2379 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2380 [ w , 0 , w , w ; * ] <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2381 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2385 ;; Saturating MLA operations with lane select.
2386 (define_insn "@aarch64_sve_qadd_<sve_int_op>_lane_<mode>"
2387 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
2388 (ss_plus:SVE_FULL_SDI
2389 (unspec:SVE_FULL_SDI
2390 [(match_operand:<VNARROW> 2 "register_operand")
2392 [(match_operand:<VNARROW> 3 "register_operand")
2393 (match_operand:SI 4 "const_int_operand")]
2394 UNSPEC_SVE_LANE_SELECT)]
2395 SVE2_INT_QADD_BINARY_LONG_LANE)
2396 (match_operand:SVE_FULL_SDI 1 "register_operand")))]
2398 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2399 [ w , 0 , w , <sve_lane_con> ; * ] <sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2400 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_qadd_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2404 ;; Non-saturating MLS operations.
2405 (define_insn "@aarch64_sve_sub_<sve_int_op><mode>"
2406 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2407 (minus:SVE_FULL_HSDI
2408 (match_operand:SVE_FULL_HSDI 1 "register_operand")
2409 (unspec:SVE_FULL_HSDI
2410 [(match_operand:<VNARROW> 2 "register_operand")
2411 (match_operand:<VNARROW> 3 "register_operand")]
2412 SVE2_INT_SUB_BINARY_LONG)))]
2414 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2415 [ w , 0 , w , w ; * ] <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2416 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2420 ;; Non-saturating MLS operations with lane select.
2421 (define_insn "@aarch64_sve_sub_<sve_int_op>_lane_<mode>"
2422 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
2424 (match_operand:SVE_FULL_SDI 1 "register_operand")
2425 (unspec:SVE_FULL_SDI
2426 [(match_operand:<VNARROW> 2 "register_operand")
2428 [(match_operand:<VNARROW> 3 "register_operand")
2429 (match_operand:SI 4 "const_int_operand")]
2430 UNSPEC_SVE_LANE_SELECT)]
2431 SVE2_INT_SUB_BINARY_LONG_LANE)))]
2433 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2434 [ w , 0 , w , <sve_lane_con> ; * ] <sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2435 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_sub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2439 ;; Saturating MLS operations.
2440 (define_insn "@aarch64_sve_qsub_<sve_int_op><mode>"
2441 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2442 (ss_minus:SVE_FULL_HSDI
2443 (match_operand:SVE_FULL_HSDI 1 "register_operand")
2444 (unspec:SVE_FULL_HSDI
2445 [(match_operand:<VNARROW> 2 "register_operand")
2446 (match_operand:<VNARROW> 3 "register_operand")]
2447 SVE2_INT_QSUB_BINARY_LONG)))]
2449 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2450 [ w , 0 , w , w ; * ] <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2451 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>
2455 ;; Saturating MLS operations with lane select.
2456 (define_insn "@aarch64_sve_qsub_<sve_int_op>_lane_<mode>"
2457 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
2458 (ss_minus:SVE_FULL_SDI
2459 (match_operand:SVE_FULL_SDI 1 "register_operand")
2460 (unspec:SVE_FULL_SDI
2461 [(match_operand:<VNARROW> 2 "register_operand")
2463 [(match_operand:<VNARROW> 3 "register_operand")
2464 (match_operand:SI 4 "const_int_operand")]
2465 UNSPEC_SVE_LANE_SELECT)]
2466 SVE2_INT_QSUB_BINARY_LONG_LANE)))]
2468 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2469 [ w , 0 , w , <sve_lane_con> ; * ] <sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2470 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_qsub_op>\t%0.<Vetype>, %2.<Ventype>, %3.<Ventype>[%4]
2474 ;; Two-way dot-product.
2475 (define_insn "<sur>dot_prodvnx4sivnx8hi"
2476 [(set (match_operand:VNx4SI 0 "register_operand")
2479 [(match_operand:VNx8HI 1 "register_operand")
2480 (match_operand:VNx8HI 2 "register_operand")]
2482 (match_operand:VNx4SI 3 "register_operand")))]
2483 "TARGET_SVE2p1_OR_SME2"
2484 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2485 [ w , w , w , 0 ; * ] <sur>dot\t%0.s, %1.h, %2.h
2486 [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.h, %2.h
2490 ;; -------------------------------------------------------------------------
2491 ;; ---- [FP] Multi-register operations
2492 ;; -------------------------------------------------------------------------
2493 ;; Includes the multi-register forms of:
2498 ;; -------------------------------------------------------------------------
2500 (define_expand "@aarch64_sve_<maxmin_uns_op><mode>"
2501 [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
2503 [(match_operand:SVE_Fx24 1 "aligned_register_operand" "Uw<vector_count>")
2504 (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
2505 SVE_FP_BINARY_MULTI))]
2506 "TARGET_STREAMING_SME2"
2509 (define_insn "*aarch64_sve_<maxmin_uns_op><mode>"
2510 [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
2512 [(match_operand:SVE_Fx24 1 "aligned_register_operand" "%0")
2513 (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
2514 SVE_FP_BINARY_MULTI))]
2515 "TARGET_STREAMING_SME2"
2516 "<b><maxmin_uns_op>\t%0, %0, %2"
2519 (define_insn "@aarch64_sve_single_<maxmin_uns_op><mode>"
2520 [(set (match_operand:SVE_Fx24 0 "aligned_register_operand" "=Uw<vector_count>")
2522 [(match_operand:SVE_Fx24 1 "aligned_register_operand" "0")
2523 (vec_duplicate:SVE_Fx24
2524 (match_operand:<VSINGLE> 2 "register_operand" "x"))]
2525 SVE_FP_BINARY_MULTI))]
2526 "TARGET_STREAMING_SME2"
2527 "<b><maxmin_uns_op>\t%0, %0, %2.<Vetype>"
2530 ;; -------------------------------------------------------------------------
2531 ;; ---- [FP] Long multiplication with accumulation
2532 ;; -------------------------------------------------------------------------
2534 ;; - FDOT (SME2 or SVE2p1)
2539 ;; -------------------------------------------------------------------------
2541 (define_insn "@aarch64_sve_<sve_fp_op><mode>"
2542 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2544 [(match_operand:<VNARROW> 1 "register_operand")
2545 (match_operand:<VNARROW> 2 "register_operand")
2546 (match_operand:VNx4SF_ONLY 3 "register_operand")]
2547 SVE2_FP_TERNARY_LONG))]
2549 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2550 [ w , w , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
2551 [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>
2555 (define_insn "@aarch64_<sve_fp_op>_lane_<mode>"
2556 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
2558 [(match_operand:<VNARROW> 1 "register_operand")
2560 [(match_operand:<VNARROW> 2 "register_operand")
2561 (match_operand:SI 3 "const_int_operand")]
2562 UNSPEC_SVE_LANE_SELECT)
2563 (match_operand:VNx4SF_ONLY 4 "register_operand")]
2564 SVE2_FP_TERNARY_LONG_LANE))]
2566 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
2567 [ w , w , <sve_lane_con> , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
2568 [ ?&w , w , <sve_lane_con> , w ; yes ] movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>[%3]
2572 ;; Two-way dot-product.
2573 (define_insn "aarch64_sve_fdotvnx4sfvnx8hf"
2574 [(set (match_operand:VNx4SF 0 "register_operand")
2577 [(match_operand:VNx8HF 1 "register_operand")
2578 (match_operand:VNx8HF 2 "register_operand")]
2580 (match_operand:VNx4SF 3 "register_operand")))]
2581 "TARGET_SVE2p1_OR_SME2"
2582 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2583 [ w , w , w , 0 ; * ] fdot\t%0.s, %1.h, %2.h
2584 [ ?&w , w , w , w ; yes ] movprfx\t%0, %3\;fdot\t%0.s, %1.h, %2.h
2588 (define_insn "aarch64_fdot_prod_lanevnx4sfvnx8hf"
2589 [(set (match_operand:VNx4SF 0 "register_operand")
2592 [(match_operand:VNx8HF 1 "register_operand")
2594 [(match_operand:VNx8HF 2 "register_operand")
2595 (match_operand:SI 3 "const_int_operand")]
2596 UNSPEC_SVE_LANE_SELECT)]
2598 (match_operand:VNx4SF 4 "register_operand")))]
2599 "TARGET_SVE2p1_OR_SME2"
2600 {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
2601 [ w , w , y , 0 ; * ] fdot\t%0.s, %1.h, %2.h[%3]
2602 [ ?&w , w , y , w ; yes ] movprfx\t%0, %4\;fdot\t%0.s, %1.h, %2.h[%3]
2606 ;; =========================================================================
2607 ;; == Narrowing arithnetic
2608 ;; =========================================================================
2610 ;; -------------------------------------------------------------------------
2611 ;; ---- [INT] Narrowing unary arithmetic
2612 ;; -------------------------------------------------------------------------
2620 ;; -------------------------------------------------------------------------
2622 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2623 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2625 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")]
2626 SVE2_INT_UNARY_NARROWB))]
2628 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>"
2631 ;; These instructions do not take MOVPRFX.
2632 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2633 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2635 [(match_operand:<VNARROW> 1 "register_operand" "0")
2636 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
2637 SVE2_INT_UNARY_NARROWT))]
2639 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>"
2642 ;; -------------------------------------------------------------------------
2643 ;; ---- [INT] Multi-vector narrowing unary arithmetic
2644 ;; -------------------------------------------------------------------------
2650 ;; -------------------------------------------------------------------------
2652 (define_insn "@aarch64_sve_<optab><VNx16QI_ONLY:mode><VNx16SI_ONLY:mode>"
2653 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
2654 (unspec:VNx16QI_ONLY
2655 [(match_operand:VNx16SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
2657 "TARGET_STREAMING_SME2"
2661 (define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8SI_ONLY:mode>"
2662 [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w")
2664 [(match_operand:VNx8SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
2670 (define_insn "@aarch64_sve_<optab><VNx8HI_ONLY:mode><VNx8DI_ONLY:mode>"
2671 [(set (match_operand:VNx8HI_ONLY 0 "register_operand" "=w")
2673 [(match_operand:VNx8DI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
2675 "TARGET_STREAMING_SME2"
2679 ;; -------------------------------------------------------------------------
2680 ;; ---- [INT] Narrowing binary arithmetic
2681 ;; -------------------------------------------------------------------------
2691 ;; -------------------------------------------------------------------------
2693 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2694 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2696 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
2697 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")]
2698 SVE2_INT_BINARY_NARROWB))]
2700 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
2703 ;; These instructions do not take MOVPRFX.
2704 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2705 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2707 [(match_operand:<VNARROW> 1 "register_operand" "0")
2708 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
2709 (match_operand:SVE_FULL_HSDI 3 "register_operand" "w")]
2710 SVE2_INT_BINARY_NARROWT))]
2712 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
2715 ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
2716 (define_insn "*bitmask_shift_plus<mode>"
2717 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
2718 (unspec:SVE_FULL_HSDI
2719 [(match_operand:<VPRED> 1)
2720 (lshiftrt:SVE_FULL_HSDI
2722 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
2723 (match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
2724 (match_operand:SVE_FULL_HSDI 4
2725 "aarch64_simd_shift_imm_vec_exact_top" ""))]
2728 "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
2731 ;; -------------------------------------------------------------------------
2732 ;; ---- [INT] Narrowing right shifts
2733 ;; -------------------------------------------------------------------------
2751 ;; -------------------------------------------------------------------------
2753 ;; The immediate range is enforced before generating the instruction.
2754 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2755 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2757 [(match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
2758 (match_operand:DI 2 "const_int_operand")]
2759 SVE2_INT_SHIFT_IMM_NARROWB))]
2761 "<sve_int_op>\t%0.<Ventype>, %1.<Vetype>, #%2"
2764 ;; The immediate range is enforced before generating the instruction.
2765 ;; These instructions do not take MOVPRFX.
2766 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2767 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2769 [(match_operand:<VNARROW> 1 "register_operand" "0")
2770 (match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
2771 (match_operand:DI 3 "const_int_operand")]
2772 SVE2_INT_SHIFT_IMM_NARROWT))]
2774 "<sve_int_op>\t%0.<Ventype>, %2.<Vetype>, #%3"
2777 ;; -------------------------------------------------------------------------
2778 ;; ---- [INT] Multi-vector narrowing right shifts
2779 ;; -------------------------------------------------------------------------
2782 ;; - SQRSHRN (SVE2p1, SME2)
2784 ;; - SQRSHRUN (SVE2p1, SME2)
2786 ;; - UQRSHRN (SVE2p1, SME2)
2787 ;; -------------------------------------------------------------------------
2789 (define_insn "@aarch64_sve_<sve_int_op><mode>"
2790 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
2792 [(match_operand:SVE_FULL_SIx2_SDIx4 1 "register_operand" "Uw<vector_count>")
2793 (match_operand:DI 2 "const_int_operand")]
2794 SVE2_INT_SHIFT_IMM_NARROWxN))]
2795 "(<MODE>mode == VNx8SImode || TARGET_STREAMING_SME2)"
2796 "<sve_int_op>\t%0.<Ventype>, %1, #%2"
2799 ;; =========================================================================
2800 ;; == Pairwise arithmetic
2801 ;; =========================================================================
2803 ;; -------------------------------------------------------------------------
2804 ;; ---- [INT] Pairwise arithmetic
2805 ;; -------------------------------------------------------------------------
2812 ;; -------------------------------------------------------------------------
2814 (define_insn "@aarch64_pred_<sve_int_op><mode>"
2815 [(set (match_operand:SVE_FULL_I 0 "register_operand")
2817 [(match_operand:<VPRED> 1 "register_operand")
2818 (match_operand:SVE_FULL_I 2 "register_operand")
2819 (match_operand:SVE_FULL_I 3 "register_operand")]
2820 SVE2_INT_BINARY_PAIR))]
2822 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2823 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2824 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2828 ;; -------------------------------------------------------------------------
2829 ;; ---- [FP] Pairwise arithmetic
2830 ;; -------------------------------------------------------------------------
2837 ;; -------------------------------------------------------------------------
2839 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
2840 [(set (match_operand:SVE_FULL_F 0 "register_operand")
2842 [(match_operand:<VPRED> 1 "register_operand")
2843 (match_operand:SVE_FULL_F 2 "register_operand")
2844 (match_operand:SVE_FULL_F 3 "register_operand")]
2845 SVE2_FP_BINARY_PAIR))]
2847 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2848 [ w , Upl , 0 , w ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2849 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2853 ;; -------------------------------------------------------------------------
2854 ;; ---- [INT] Pairwise arithmetic with accumulation
2855 ;; -------------------------------------------------------------------------
2859 ;; -------------------------------------------------------------------------
2861 ;; Predicated pairwise absolute difference and accumulate with merging.
2862 (define_expand "@cond_<sve_int_op><mode>"
2863 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2864 (unspec:SVE_FULL_HSDI
2865 [(match_operand:<VPRED> 1 "register_operand")
2866 (unspec:SVE_FULL_HSDI
2868 (match_operand:SVE_FULL_HSDI 2 "register_operand")
2869 (match_operand:<VNARROW> 3 "register_operand")]
2870 SVE2_INT_BINARY_PAIR_LONG)
2871 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_reg_or_zero")]
2875 /* Only target code is aware of these operations, so we don't need
2876 to handle the fully-general case. */
2877 gcc_assert (rtx_equal_p (operands[2], operands[4])
2878 || CONSTANT_P (operands[4]));
2881 ;; Predicated pairwise absolute difference and accumulate, merging with
2883 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_2"
2884 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2885 (unspec:SVE_FULL_HSDI
2886 [(match_operand:<VPRED> 1 "register_operand")
2887 (unspec:SVE_FULL_HSDI
2889 (match_operand:SVE_FULL_HSDI 2 "register_operand")
2890 (match_operand:<VNARROW> 3 "register_operand")]
2891 SVE2_INT_BINARY_PAIR_LONG)
2895 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2896 [ w , Upl , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
2897 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
2899 "&& !CONSTANT_P (operands[4])"
2901 operands[4] = CONSTM1_RTX (<VPRED>mode);
2905 ;; Predicated pairwise absolute difference and accumulate, merging with zero.
2906 (define_insn_and_rewrite "*cond_<sve_int_op><mode>_z"
2907 [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
2908 (unspec:SVE_FULL_HSDI
2909 [(match_operand:<VPRED> 1 "register_operand")
2910 (unspec:SVE_FULL_HSDI
2912 (match_operand:SVE_FULL_HSDI 2 "register_operand")
2913 (match_operand:<VNARROW> 3 "register_operand")]
2914 SVE2_INT_BINARY_PAIR_LONG)
2915 (match_operand:SVE_FULL_HSDI 4 "aarch64_simd_imm_zero")]
2918 {@ [ cons: =0 , 1 , 2 , 3 ]
2919 [ &w , Upl , 0 , w ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
2920 [ &w , Upl , w , w ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %3.<Ventype>
2922 "&& !CONSTANT_P (operands[5])"
2924 operands[5] = CONSTM1_RTX (<VPRED>mode);
2926 [(set_attr "movprfx" "yes")]
2929 ;; -------------------------------------------------------------------------
2930 ;; -- [FP] Absolute maximum and minimum
2931 ;; -------------------------------------------------------------------------
2935 ;; -------------------------------------------------------------------------
2936 ;; Predicated floating-point absolute maximum and minimum.
2937 (define_insn_and_rewrite "*aarch64_pred_faminmax_fused"
2938 [(set (match_operand:SVE_FULL_F 0 "register_operand")
2940 [(match_operand:<VPRED> 1 "register_operand")
2941 (match_operand:SI 4 "aarch64_sve_gp_strictness")
2944 (const_int SVE_RELAXED_GP)
2945 (match_operand:SVE_FULL_F 2 "register_operand")]
2949 (const_int SVE_RELAXED_GP)
2950 (match_operand:SVE_FULL_F 3 "register_operand")]
2953 "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2"
2954 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
2955 [ w , Upl , %0 , w ; * ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2956 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
2958 "&& (!rtx_equal_p (operands[1], operands[5])
2959 || !rtx_equal_p (operands[1], operands[6]))"
2961 operands[5] = copy_rtx (operands[1]);
2962 operands[6] = copy_rtx (operands[1]);
2966 ;; =========================================================================
2967 ;; == Complex arithmetic
2968 ;; =========================================================================
2970 ;; -------------------------------------------------------------------------
2971 ;; ---- [INT] Complex binary operations
2972 ;; -------------------------------------------------------------------------
2976 ;; -------------------------------------------------------------------------
2978 (define_insn "@aarch64_sve_<optab><mode>"
2979 [(set (match_operand:SVE_FULL_I 0 "register_operand")
2981 [(match_operand:SVE_FULL_I 1 "register_operand")
2982 (match_operand:SVE_FULL_I 2 "register_operand")]
2985 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
2986 [ w , 0 , w ; * ] <sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
2987 [ ?&w , w , w ; yes ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #<rot>
2991 ;; unpredicated optab pattern for auto-vectorizer
2992 (define_expand "cadd<rot><mode>3"
2993 [(set (match_operand:SVE_FULL_I 0 "register_operand")
2995 [(match_operand:SVE_FULL_I 1 "register_operand")
2996 (match_operand:SVE_FULL_I 2 "register_operand")]
3001 ;; -------------------------------------------------------------------------
3002 ;; ---- [INT] Complex ternary operations
3003 ;; -------------------------------------------------------------------------
3007 ;; -------------------------------------------------------------------------
3009 (define_insn "@aarch64_sve_<optab><mode>"
3010 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3012 [(match_operand:SVE_FULL_I 1 "register_operand")
3013 (match_operand:SVE_FULL_I 2 "register_operand")
3014 (match_operand:SVE_FULL_I 3 "register_operand")]
3017 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3018 [ w , 0 , w , w ; * ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
3019 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>, #<rot>
3023 (define_insn "@aarch64_<optab>_lane_<mode>"
3024 [(set (match_operand:SVE_FULL_HSI 0 "register_operand")
3025 (unspec:SVE_FULL_HSI
3026 [(match_operand:SVE_FULL_HSI 1 "register_operand")
3027 (match_operand:SVE_FULL_HSI 2 "register_operand")
3028 (unspec:SVE_FULL_HSI
3029 [(match_operand:SVE_FULL_HSI 3 "register_operand")
3030 (match_operand:SI 4 "const_int_operand")]
3031 UNSPEC_SVE_LANE_SELECT)]
3034 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3035 [ w , 0 , w , <sve_lane_con> ; * ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
3036 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>[%4], #<rot>
3040 ;; unpredicated optab pattern for auto-vectorizer
3041 ;; The complex mla/mls operations always need to expand to two instructions.
3042 ;; The first operation does half the computation and the second does the
3043 ;; remainder. Because of this, expand early.
3044 (define_expand "cml<fcmac1><conj_op><mode>4"
3045 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3046 (plus:SVE_FULL_I (match_operand:SVE_FULL_I 1 "register_operand")
3048 [(match_operand:SVE_FULL_I 2 "register_operand")
3049 (match_operand:SVE_FULL_I 3 "register_operand")]
3050 SVE2_INT_CMLA_OP)))]
3053 rtx tmp = gen_reg_rtx (<MODE>mode);
3054 emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, operands[1],
3055 operands[3], operands[2]));
3056 emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp,
3057 operands[3], operands[2]));
3061 ;; unpredicated optab pattern for auto-vectorizer
3062 ;; The complex mul operations always need to expand to two instructions.
3063 ;; The first operation does half the computation and the second does the
3064 ;; remainder. Because of this, expand early.
3065 (define_expand "cmul<conj_op><mode>3"
3066 [(set (match_operand:SVE_FULL_I 0 "register_operand")
3068 [(match_operand:SVE_FULL_I 1 "register_operand")
3069 (match_operand:SVE_FULL_I 2 "register_operand")]
3073 rtx accum = force_reg (<MODE>mode, CONST0_RTX (<MODE>mode));
3074 rtx tmp = gen_reg_rtx (<MODE>mode);
3075 emit_insn (gen_aarch64_sve_cmla<sve_rot1><mode> (tmp, accum,
3076 operands[2], operands[1]));
3077 emit_insn (gen_aarch64_sve_cmla<sve_rot2><mode> (operands[0], tmp,
3078 operands[2], operands[1]));
3082 ;; -------------------------------------------------------------------------
3083 ;; ---- [INT] Complex dot product
3084 ;; -------------------------------------------------------------------------
3087 ;; -------------------------------------------------------------------------
3089 (define_insn "@aarch64_sve_<optab><mode>"
3090 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3091 (unspec:SVE_FULL_SDI
3092 [(match_operand:SVE_FULL_SDI 1 "register_operand")
3093 (match_operand:<VSI2QI> 2 "register_operand")
3094 (match_operand:<VSI2QI> 3 "register_operand")]
3097 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3098 [ w , 0 , w , w ; * ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
3099 [ ?&w , w , w , w ; yes ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>, #<rot>
3103 (define_insn "@aarch64_<optab>_lane_<mode>"
3104 [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
3105 (unspec:SVE_FULL_SDI
3106 [(match_operand:SVE_FULL_SDI 1 "register_operand")
3107 (match_operand:<VSI2QI> 2 "register_operand")
3109 [(match_operand:<VSI2QI> 3 "register_operand")
3110 (match_operand:SI 4 "const_int_operand")]
3111 UNSPEC_SVE_LANE_SELECT)]
3114 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3115 [ w , 0 , w , <sve_lane_con> ; * ] <sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
3116 [ ?&w , w , w , <sve_lane_con> ; yes ] movprfx\t%0, %1\;<sve_int_op>\t%0.<Vetype>, %2.<Vetype_fourth>, %3.<Vetype_fourth>[%4], #<rot>
3120 ;; =========================================================================
3122 ;; =========================================================================
3124 ;; -------------------------------------------------------------------------
3125 ;; ---- [FP<-FP] Widening conversions
3126 ;; -------------------------------------------------------------------------
3137 ;; -------------------------------------------------------------------------
3139 ;; Predicated convert long top.
3140 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
3141 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
3142 (unspec:SVE_FULL_SDF
3143 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3144 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3145 (match_operand:<VNARROW> 2 "register_operand" "0")]
3146 SVE2_COND_FP_UNARY_LONG))]
3148 "<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Ventype>"
3151 ;; Predicated convert long top with merging.
3152 (define_expand "@cond_<sve_fp_op><mode>"
3153 [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
3154 (unspec:SVE_FULL_SDF
3155 [(match_operand:<VPRED> 1 "register_operand")
3156 (unspec:SVE_FULL_SDF
3158 (const_int SVE_STRICT_GP)
3159 (match_operand:<VNARROW> 2 "register_operand")]
3160 SVE2_COND_FP_UNARY_LONG)
3161 (match_operand:SVE_FULL_SDF 3 "register_operand")]
3166 ;; These instructions do not take MOVPRFX.
3167 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_relaxed"
3168 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
3169 (unspec:SVE_FULL_SDF
3170 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3171 (unspec:SVE_FULL_SDF
3173 (const_int SVE_RELAXED_GP)
3174 (match_operand:<VNARROW> 2 "register_operand" "w")]
3175 SVE2_COND_FP_UNARY_LONG)
3176 (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
3179 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
3180 "&& !rtx_equal_p (operands[1], operands[4])"
3182 operands[4] = copy_rtx (operands[1]);
3186 (define_insn "*cond_<sve_fp_op><mode>_strict"
3187 [(set (match_operand:SVE_FULL_SDF 0 "register_operand" "=w")
3188 (unspec:SVE_FULL_SDF
3189 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3190 (unspec:SVE_FULL_SDF
3192 (const_int SVE_STRICT_GP)
3193 (match_operand:<VNARROW> 2 "register_operand" "w")]
3194 SVE2_COND_FP_UNARY_LONG)
3195 (match_operand:SVE_FULL_SDF 3 "register_operand" "0")]
3198 "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
3201 (define_insn "@aarch64_sve2_fp8_cvt_<fp8_cvt_uns_op><mode>"
3202 [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
3204 [(match_operand:VNx16QI 1 "register_operand" "w")
3205 (reg:DI FPM_REGNUM)]
3208 "<b><fp8_cvt_uns_op>\t%0.h, %1.b"
3211 ;; -------------------------------------------------------------------------
3212 ;; ---- [FP<-FP] Narrowing conversions
3213 ;; -------------------------------------------------------------------------
3218 ;; -------------------------------------------------------------------------
3220 ;; Predicated FCVTNT. This doesn't give a natural aarch64_pred_*/cond_*
3221 ;; pair because the even elements always have to be supplied for active
3222 ;; elements, even if the inactive elements don't matter.
3224 ;; These instructions do not take MOVPRFX.
3225 (define_insn "@aarch64_sve_cvtnt<mode>"
3226 [(set (match_operand:SVE_FULL_HSF 0 "register_operand" "=w")
3227 (unspec:SVE_FULL_HSF
3228 [(match_operand:<VWIDE_PRED> 2 "register_operand" "Upl")
3229 (const_int SVE_STRICT_GP)
3230 (match_operand:SVE_FULL_HSF 1 "register_operand" "0")
3231 (match_operand:<VWIDE> 3 "register_operand" "w")]
3232 UNSPEC_COND_FCVTNT))]
3234 "fcvtnt\t%0.<Vetype>, %2/m, %3.<Vewtype>"
3237 ;; Predicated FCVTX (equivalent to what would be FCVTXNB, except that
3238 ;; it supports MOVPRFX).
3239 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
3240 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
3242 [(match_operand:<VWIDE_PRED> 1 "register_operand")
3243 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3244 (match_operand:<VWIDE> 2 "register_operand")]
3245 SVE2_COND_FP_UNARY_NARROWB))]
3247 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3248 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3249 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3253 ;; Predicated FCVTX with merging.
3254 (define_expand "@cond_<sve_fp_op><mode>"
3255 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
3257 [(match_operand:<VWIDE_PRED> 1 "register_operand")
3260 (const_int SVE_STRICT_GP)
3261 (match_operand:<VWIDE> 2 "register_operand")]
3262 SVE2_COND_FP_UNARY_NARROWB)
3263 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
3268 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>_any_relaxed"
3269 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
3271 [(match_operand:<VWIDE_PRED> 1 "register_operand")
3274 (const_int SVE_RELAXED_GP)
3275 (match_operand:<VWIDE> 2 "register_operand")]
3276 SVE2_COND_FP_UNARY_NARROWB)
3277 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
3279 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
3280 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3281 [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3282 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3283 [ &w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3285 "&& !rtx_equal_p (operands[1], operands[4])"
3287 operands[4] = copy_rtx (operands[1]);
3291 (define_insn "*cond_<sve_fp_op><mode>_any_strict"
3292 [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
3294 [(match_operand:<VWIDE_PRED> 1 "register_operand")
3297 (const_int SVE_STRICT_GP)
3298 (match_operand:<VWIDE> 2 "register_operand")]
3299 SVE2_COND_FP_UNARY_NARROWB)
3300 (match_operand:VNx4SF_ONLY 3 "aarch64_simd_reg_or_zero")]
3302 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
3303 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3304 [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3305 [ &w , Upl , w , Dz ; yes ] movprfx\t%0.<Vewtype>, %1/z, %2.<Vewtype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3306 [ &w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vewtype>
3310 ;; Predicated FCVTXNT. This doesn't give a natural aarch64_pred_*/cond_*
3311 ;; pair because the even elements always have to be supplied for active
3312 ;; elements, even if the inactive elements don't matter.
3314 ;; These instructions do not take MOVPRFX.
3315 (define_insn "@aarch64_sve2_cvtxnt<mode>"
3316 [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
3318 [(match_operand:<VPRED> 2 "register_operand" "Upl")
3319 (const_int SVE_STRICT_GP)
3320 (match_operand:<VNARROW> 1 "register_operand" "0")
3321 (match_operand:VNx2DF_ONLY 3 "register_operand" "w")]
3322 UNSPEC_COND_FCVTXNT))]
3324 "fcvtxnt\t%0.<Ventype>, %2/m, %3.<Vetype>"
3327 ;; -------------------------------------------------------------------------
3328 ;; ---- [FP<-FP] Multi-vector widening conversions
3329 ;; -------------------------------------------------------------------------
3330 ;; Includes the multi-register forms of:
3331 ;; - FCVT (SME_F16F16)
3332 ;; - FCVTL (SME_F16F16)
3333 ;; -------------------------------------------------------------------------
3335 (define_insn "extendvnx8hfvnx8sf2"
3336 [(set (match_operand:VNx8SF 0 "aligned_register_operand" "=Uw2")
3337 (float_extend:VNx8SF
3338 (match_operand:VNx8HF 1 "register_operand" "w")))]
3339 "TARGET_STREAMING_SME_F16F16"
3343 (define_insn "@aarch64_sve_cvtl<mode>"
3344 [(set (match_operand:VNx8SF_ONLY 0 "aligned_register_operand" "=Uw2")
3346 [(match_operand:VNx8HF 1 "register_operand" "w")]
3348 "TARGET_STREAMING_SME_F16F16"
3352 ;; -------------------------------------------------------------------------
3353 ;; ---- [FP<-FP] Multi-vector narrowing conversions
3354 ;; -------------------------------------------------------------------------
3355 ;; Includes the multi-register forms of:
3362 ;; -------------------------------------------------------------------------
3364 (define_insn "truncvnx8sf<mode>2"
3365 [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
3366 (float_truncate:SVE_FULL_HF
3367 (match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")))]
3368 "TARGET_STREAMING_SME2"
3372 (define_insn "@aarch64_sve_cvtn<mode>"
3373 [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
3375 [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")]
3377 "TARGET_STREAMING_SME2"
3378 "<b>fcvtn\t%0.h, %1"
3381 (define_insn "@aarch64_sve2_fp8_cvtn<mode>"
3382 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
3384 [(match_operand:SVE_FULL_HFx2 1 "aligned_register_operand" "Uw2")
3385 (reg:DI FPM_REGNUM)]
3388 "<b>fcvtn\t%0.b, %1"
3391 (define_insn "@aarch64_sve2_fp8_cvtnb<mode>"
3392 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
3393 (unspec:VNx16QI_ONLY
3394 [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")
3395 (reg:DI FPM_REGNUM)]
3401 (define_insn "@aarch64_sve_cvtnt<mode>"
3402 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
3403 (unspec:VNx16QI_ONLY
3404 [(match_operand:VNx16QI_ONLY 1 "register_operand" "0")
3405 (match_operand:VNx8SF 2 "aligned_register_operand" "Uw2")
3406 (reg:DI FPM_REGNUM)]
3412 ;; -------------------------------------------------------------------------
3413 ;; ---- [FP<-INT] Multi-vector conversions
3414 ;; -------------------------------------------------------------------------
3415 ;; Includes the multi-register forms of:
3418 ;; -------------------------------------------------------------------------
3420 (define_insn "<optab><v_int_equiv><mode>2"
3421 [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
3422 (FLOATUORS:SVE_SFx24
3423 (match_operand:<V_INT_EQUIV> 1 "aligned_register_operand" "Uw<vector_count>")))]
3424 "TARGET_STREAMING_SME2"
3425 "<su_optab>cvtf\t%0, %1"
3428 ;; -------------------------------------------------------------------------
3429 ;; ---- [INT<-FP] Multi-vector conversions
3430 ;; -------------------------------------------------------------------------
3431 ;; Includes the multi-register forms of:
3434 ;; -------------------------------------------------------------------------
3436 (define_insn "<optab><mode><v_int_equiv>2"
3437 [(set (match_operand:<V_INT_EQUIV> 0 "aligned_register_operand" "=Uw<vector_count>")
3438 (FIXUORS:<V_INT_EQUIV>
3439 (match_operand:SVE_SFx24 1 "aligned_register_operand" "Uw<vector_count>")))]
3440 "TARGET_STREAMING_SME2"
3444 ;; =========================================================================
3445 ;; == Other arithmetic
3446 ;; =========================================================================
3448 ;; -------------------------------------------------------------------------
3449 ;; ---- [INT] Reciprocal approximation
3450 ;; -------------------------------------------------------------------------
3454 ;; -------------------------------------------------------------------------
3456 ;; Predicated integer unary operations.
3457 (define_insn "@aarch64_pred_<sve_int_op><mode>"
3458 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
3460 [(match_operand:<VPRED> 1 "register_operand")
3462 [(match_operand:VNx4SI_ONLY 2 "register_operand")]
3466 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3467 [ w , Upl , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3468 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3472 ;; Predicated integer unary operations with merging.
3473 (define_expand "@cond_<sve_int_op><mode>"
3474 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
3476 [(match_operand:<VPRED> 1 "register_operand")
3480 [(match_operand:VNx4SI_ONLY 2 "register_operand")]
3483 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
3487 operands[4] = CONSTM1_RTX (<MODE>mode);
3491 (define_insn_and_rewrite "*cond_<sve_int_op><mode>"
3492 [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
3494 [(match_operand:<VPRED> 1 "register_operand")
3498 [(match_operand:VNx4SI_ONLY 2 "register_operand")]
3501 (match_operand:VNx4SI_ONLY 3 "aarch64_simd_reg_or_zero")]
3504 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3505 [ w , Upl , w , 0 ; * ] <sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3506 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3507 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_int_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3509 "&& !CONSTANT_P (operands[4])"
3511 operands[4] = CONSTM1_RTX (<VPRED>mode);
3515 ;; -------------------------------------------------------------------------
3516 ;; ---- [INT<-FP] Base-2 logarithm
3517 ;; -------------------------------------------------------------------------
3520 ;; -------------------------------------------------------------------------
3522 ;; Predicated FLOGB.
3523 (define_insn "@aarch64_pred_<sve_fp_op><mode>"
3524 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3525 (unspec:<V_INT_EQUIV>
3526 [(match_operand:<VPRED> 1 "register_operand")
3527 (match_operand:SI 3 "aarch64_sve_gp_strictness")
3528 (match_operand:SVE_FULL_F 2 "register_operand")]
3529 SVE2_COND_INT_UNARY_FP))]
3531 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3532 [ w , Upl , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3533 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3537 ;; Predicated FLOGB with merging.
3538 (define_expand "@cond_<sve_fp_op><mode>"
3539 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3540 (unspec:<V_INT_EQUIV>
3541 [(match_operand:<VPRED> 1 "register_operand")
3542 (unspec:<V_INT_EQUIV>
3544 (const_int SVE_STRICT_GP)
3545 (match_operand:SVE_FULL_F 2 "register_operand")]
3546 SVE2_COND_INT_UNARY_FP)
3547 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
3552 (define_insn_and_rewrite "*cond_<sve_fp_op><mode>"
3553 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3554 (unspec:<V_INT_EQUIV>
3555 [(match_operand:<VPRED> 1 "register_operand")
3556 (unspec:<V_INT_EQUIV>
3558 (const_int SVE_RELAXED_GP)
3559 (match_operand:SVE_FULL_F 2 "register_operand")]
3560 SVE2_COND_INT_UNARY_FP)
3561 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
3563 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
3564 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3565 [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3566 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3567 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3569 "&& !rtx_equal_p (operands[1], operands[4])"
3571 operands[4] = copy_rtx (operands[1]);
3575 (define_insn "*cond_<sve_fp_op><mode>_strict"
3576 [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
3577 (unspec:<V_INT_EQUIV>
3578 [(match_operand:<VPRED> 1 "register_operand")
3579 (unspec:<V_INT_EQUIV>
3581 (const_int SVE_STRICT_GP)
3582 (match_operand:SVE_FULL_F 2 "register_operand")]
3583 SVE2_COND_INT_UNARY_FP)
3584 (match_operand:<V_INT_EQUIV> 3 "aarch64_simd_reg_or_zero")]
3586 "TARGET_SVE2 && !rtx_equal_p (operands[2], operands[3])"
3587 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3588 [ &w , Upl , w , 0 ; * ] <sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3589 [ ?&w , Upl , w , Dz ; yes ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3590 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Vetype>
3594 ;; -------------------------------------------------------------------------
3595 ;; ---- [INT] Polynomial multiplication
3596 ;; -------------------------------------------------------------------------
3601 ;; -------------------------------------------------------------------------
3604 (define_insn "@aarch64_sve2_pmul<mode>"
3605 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
3606 (unspec:VNx16QI_ONLY
3607 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
3608 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
3611 "pmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3614 ;; Extending PMUL, with the results modeled as wider vectors.
3615 ;; This representation is only possible for .H and .D, not .Q.
3616 (define_insn "@aarch64_sve_<optab><mode>"
3617 [(set (match_operand:SVE_FULL_HDI 0 "register_operand" "=w")
3618 (unspec:SVE_FULL_HDI
3619 [(match_operand:<VNARROW> 1 "register_operand" "w")
3620 (match_operand:<VNARROW> 2 "register_operand" "w")]
3623 "<sve_int_op>\t%0.<Vetype>, %1.<Ventype>, %2.<Ventype>"
3626 ;; Extending PMUL, with the results modeled as pairs of values.
3627 ;; This representation works for .H, .D and .Q, with .Q requiring
3628 ;; the AES extension. (This is enforced by the mode iterator.)
3629 (define_insn "@aarch64_sve_<optab><mode>"
3630 [(set (match_operand:SVE2_PMULL_PAIR_I 0 "register_operand" "=w")
3631 (unspec:SVE2_PMULL_PAIR_I
3632 [(match_operand:SVE2_PMULL_PAIR_I 1 "register_operand" "w")
3633 (match_operand:SVE2_PMULL_PAIR_I 2 "register_operand" "w")]
3636 "<sve_int_op>\t%0.<Vewtype>, %1.<Vetype>, %2.<Vetype>"
3639 ;; =========================================================================
3640 ;; == Comparisons and selects
3641 ;; =========================================================================
3643 ;; -------------------------------------------------------------------------
3644 ;; ---- [INT,FP] Select based on predicates as counters
3645 ;; -------------------------------------------------------------------------
3647 (define_insn "@aarch64_sve_sel<mode>"
3648 [(set (match_operand:SVE_FULLx24 0 "register_operand" "=Uw<vector_count>")
3650 [(match_operand:<VPRED> 3 "register_operand" "Uph")
3651 (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
3652 (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
3654 "TARGET_STREAMING_SME2"
3655 "sel\t%0, %K3, %1, %2"
3658 ;; -------------------------------------------------------------------------
3659 ;; ---- [INT] While tests
3660 ;; -------------------------------------------------------------------------
3661 ;; Includes the x2 and count versions of:
3670 ;; -------------------------------------------------------------------------
3672 (define_insn "@aarch64_sve_while<while_optab_cmp>_b<BHSD_BITS>_x2"
3673 [(set (match_operand:VNx32BI 0 "register_operand" "=Up2")
3675 [(const_int SVE_WHILE_B_X2)
3676 (match_operand:DI 1 "aarch64_reg_or_zero" "rZ")
3677 (match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
3678 (const_int BHSD_BITS)]
3680 (clobber (reg:CC_NZC CC_REGNUM))]
3681 "TARGET_SVE2p1_OR_SME2"
3682 "while<cmp_op>\t{%S0.<bits_etype>, %T0.<bits_etype>}, %x1, %x2"
3685 (define_insn "@aarch64_sve_while<while_optab_cmp>_c<BHSD_BITS>"
3686 [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
3688 [(const_int SVE_WHILE_C)
3689 (match_operand:DI 1 "aarch64_reg_or_zero" "rZ")
3690 (match_operand:DI 2 "aarch64_reg_or_zero" "rZ")
3691 (const_int BHSD_BITS)
3692 (match_operand:DI 3 "const_int_operand")]
3694 (clobber (reg:CC_NZC CC_REGNUM))]
3695 "TARGET_SVE2p1_OR_SME2"
3696 "while<cmp_op>\t%K0.<bits_etype>, %x1, %x2, vlx%3"
3699 ;; =========================================================================
3701 ;; =========================================================================
3703 ;; -------------------------------------------------------------------------
3704 ;; ---- [INT] Reduction to 128-bit vector
3705 ;; -------------------------------------------------------------------------
3711 ;; - SMAXQV (SVE2p1)
3712 ;; - SMINQV (SVE2p1)
3713 ;; - UMAXQV (SVE2p1)
3714 ;; - UMINQV (SVE2p1)
3715 ;; -------------------------------------------------------------------------
3717 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
3718 [(set (match_operand:<V128> 0 "register_operand" "=w")
3720 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3721 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
3722 SVE_INT_REDUCTION_128))]
3723 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
3724 "<optab>\t%0.<Vtype>, %1, %2.<Vetype>"
3727 ;; -------------------------------------------------------------------------
3728 ;; ---- [FP] Reduction to 128-bit vector
3729 ;; -------------------------------------------------------------------------
3731 ;; - FADDQV (SVE2p1)
3732 ;; - FMAXNMQV (SVE2p1)
3733 ;; - FMAXQV (SVE2p1)
3734 ;; - FMINNMQV (SVE2p1)
3735 ;; - FMINQV (SVE2p1)
3736 ;; -------------------------------------------------------------------------
3738 (define_insn "@aarch64_pred_reduc_<optab>_<mode>"
3739 [(set (match_operand:<V128> 0 "register_operand" "=w")
3741 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3742 (match_operand:SVE_FULL_F 2 "register_operand" "w")]
3743 SVE_FP_REDUCTION_128))]
3744 "TARGET_SVE2p1 && TARGET_NON_STREAMING"
3745 "<optab>\t%0.<Vtype>, %1, %2.<Vetype>"
3748 ;; =========================================================================
3750 ;; =========================================================================
3752 ;; -------------------------------------------------------------------------
3753 ;; ---- [INT,FP] Reversal
3754 ;; -------------------------------------------------------------------------
3757 ;; -------------------------------------------------------------------------
3759 (define_insn "@aarch64_pred_<optab><mode>"
3760 [(set (match_operand:SVE_FULL 0 "register_operand")
3762 [(match_operand:VNx2BI 1 "register_operand")
3764 [(match_operand:SVE_FULL 2 "register_operand")]
3767 "TARGET_SVE2p1_OR_SME"
3768 {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
3769 [ w , Upl , 0 ; * ] revd\t%0.q, %1/m, %2.q
3770 [ ?&w , Upl , w ; yes ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q
3774 (define_insn "@cond_<optab><mode>"
3775 [(set (match_operand:SVE_FULL 0 "register_operand")
3777 [(match_operand:VNx2BI 1 "register_operand")
3779 [(match_operand:SVE_FULL 2 "register_operand")]
3781 (match_operand:SVE_FULL 3 "register_operand")]
3783 "TARGET_SVE2p1_OR_SME"
3784 {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
3785 [ w , Upl , w , 0 ; * ] revd\t%0.q, %1/m, %2.q
3786 [ ?&w , Upl , w , w ; yes ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q
3790 ;; -------------------------------------------------------------------------
3791 ;; ---- [INT,FP] HVLA permutes
3792 ;; -------------------------------------------------------------------------
3796 ;; -------------------------------------------------------------------------
3798 (define_insn "@aarch64_sve_dupq<mode>"
3799 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
3801 [(match_operand:SVE_FULL 1 "register_operand" "w")
3802 (match_operand:SI 2 "const_int_operand")]
3805 && TARGET_NON_STREAMING
3806 && IN_RANGE (INTVAL (operands[2]) * (<elem_bits> / 8), 0, 15)"
3807 "dupq\t%0.<Vetype>, %1.<Vetype>[%2]"
3810 (define_insn "@aarch64_sve_extq<mode>"
3811 [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
3813 [(match_operand:SVE_FULL 1 "register_operand" "0, w")
3814 (match_operand:SVE_FULL 2 "register_operand" "w, w")
3815 (match_operand:SI 3 "const_int_operand")]
3818 && TARGET_NON_STREAMING
3819 && IN_RANGE (INTVAL (operands[3]) * (<elem_bits> / 8), 0, 15)"
3821 operands[3] = GEN_INT (INTVAL (operands[3]) * (<elem_bits> / 8));
3822 return (which_alternative == 0
3823 ? "extq\\t%0.b, %0.b, %2.b, #%3"
3824 : "movprfx\t%0, %1\;extq\\t%0.b, %0.b, %2.b, #%3");
3826 [(set_attr "movprfx" "*,yes")]
3829 ;; -------------------------------------------------------------------------
3830 ;; ---- [INT,FP] General permutes
3831 ;; -------------------------------------------------------------------------
3833 ;; - TBL (vector pair form)
3836 ;; -------------------------------------------------------------------------
3838 ;; TBL on a pair of data vectors.
3839 (define_insn "@aarch64_sve2_tbl2<mode>"
3840 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
3842 [(match_operand:<VDOUBLE> 1 "register_operand" "w")
3843 (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
3846 "tbl\t%0.<Vetype>, %1, %2.<Vetype>"
3849 ;; TBX(Q). These instructions do not take MOVPRFX.
3850 (define_insn "@aarch64_sve_<perm_insn><mode>"
3851 [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
3853 [(match_operand:SVE_FULL 1 "register_operand" "0")
3854 (match_operand:SVE_FULL 2 "register_operand" "w")
3855 (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
3858 "<perm_insn>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
3861 ;; -------------------------------------------------------------------------
3862 ;; ---- [INT,FP] Multi-register permutes
3863 ;; -------------------------------------------------------------------------
3867 ;; -------------------------------------------------------------------------
3869 (define_insn "@aarch64_sve_<optab><mode>"
3870 [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2")
3872 [(match_operand:<VSINGLE> 1 "register_operand" "w")
3873 (match_operand:<VSINGLE> 2 "register_operand" "w")]
3875 "TARGET_STREAMING_SME2"
3876 "<perm_insn>\t%0, %1.<Vetype>, %2.<Vetype>"
3879 (define_insn "@aarch64_sve_<optab><mode>"
3880 [(set (match_operand:SVE_FULLx2 0 "aligned_register_operand" "=Uw2")
3882 [(match_operand:<VSINGLE> 1 "register_operand" "w")
3883 (match_operand:<VSINGLE> 2 "register_operand" "w")]
3884 SVE2_x24_PERMUTEQ))]
3885 "TARGET_STREAMING_SME2"
3886 "<perm_insn>\t{%S0.q - %T0.q}, %1.q, %2.q"
3889 (define_insn "@aarch64_sve_<optab><mode>"
3890 [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4")
3892 [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")]
3894 "TARGET_STREAMING_SME2"
3895 "<perm_insn>\t%0, %1"
3898 (define_insn "@aarch64_sve_<optab><mode>"
3899 [(set (match_operand:SVE_FULLx4 0 "aligned_register_operand" "=Uw4")
3901 [(match_operand:SVE_FULLx4 1 "aligned_register_operand" "Uw4")]
3902 SVE2_x24_PERMUTEQ))]
3903 "TARGET_STREAMING_SME2"
3904 "<perm_insn>\t{%S0.q - %V0.q}, {%S1.q - %V1.q}"
3907 ;; -------------------------------------------------------------------------
3908 ;; ---- [INT] Optional bit-permute extensions
3909 ;; -------------------------------------------------------------------------
3914 ;; -------------------------------------------------------------------------
3916 (define_insn "@aarch64_sve_<sve_int_op><mode>"
3917 [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
3919 [(match_operand:SVE_FULL_I 1 "register_operand" "w")
3920 (match_operand:SVE_FULL_I 2 "register_operand" "w")]
3922 "TARGET_SVE2_BITPERM"
3923 "<sve_int_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
3926 ;; =========================================================================
3928 ;; =========================================================================
3930 ;; -------------------------------------------------------------------------
3931 ;; ---- Check for aliases between pointers
3932 ;; -------------------------------------------------------------------------
3933 ;; The patterns in this section are synthetic: WHILERW and WHILEWR are
3934 ;; defined in aarch64-sve.md instead.
3935 ;; -------------------------------------------------------------------------
3937 ;; Use WHILERW and WHILEWR to accelerate alias checks. This is only
3938 ;; possible if the accesses we're checking are exactly the same size
3939 ;; as an SVE vector.
3940 (define_expand "check_<raw_war>_ptrs<mode>"
3941 [(match_operand:GPI 0 "register_operand")
3943 [(match_operand:GPI 1 "register_operand")
3944 (match_operand:GPI 2 "register_operand")
3945 (match_operand:GPI 3 "aarch64_bytes_per_sve_vector_operand")
3946 (match_operand:GPI 4 "const_int_operand")]
3950 /* Use the widest predicate mode we can. */
3951 unsigned int align = INTVAL (operands[4]);
3954 machine_mode pred_mode = aarch64_sve_pred_mode (align).require ();
3956 /* Emit a WHILERW or WHILEWR, setting the condition codes based on
3958 emit_insn (gen_while_ptest
3959 (<SVE2_WHILE_PTR:unspec>, <MODE>mode, pred_mode,
3960 gen_rtx_SCRATCH (pred_mode), operands[1], operands[2],
3961 CONSTM1_RTX (VNx16BImode), CONSTM1_RTX (pred_mode)));
3963 /* Set operand 0 to true if the last bit of the predicate result is set,
3964 i.e. if all elements are free of dependencies. */
3965 rtx cc_reg = gen_rtx_REG (CC_NZCmode, CC_REGNUM);
3966 rtx cmp = gen_rtx_LTU (<MODE>mode, cc_reg, const0_rtx);
3967 emit_insn (gen_aarch64_cstore<mode> (operands[0], cmp, cc_reg));
3971 ;; -------------------------------------------------------------------------
3972 ;; ---- Histogram processing
3973 ;; -------------------------------------------------------------------------
3977 ;; -------------------------------------------------------------------------
3979 (define_insn "@aarch64_sve2_histcnt<mode>"
3980 [(set (match_operand:SVE_FULL_SDI 0 "register_operand" "=w")
3981 (unspec:SVE_FULL_SDI
3982 [(match_operand:<VPRED> 1 "register_operand" "Upl")
3983 (match_operand:SVE_FULL_SDI 2 "register_operand" "w")
3984 (match_operand:SVE_FULL_SDI 3 "register_operand" "w")]
3986 "TARGET_SVE2 && TARGET_NON_STREAMING"
3987 "histcnt\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
3990 (define_insn "@aarch64_sve2_histseg<mode>"
3991 [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
3992 (unspec:VNx16QI_ONLY
3993 [(match_operand:VNx16QI_ONLY 1 "register_operand" "w")
3994 (match_operand:VNx16QI_ONLY 2 "register_operand" "w")]
3996 "TARGET_SVE2 && TARGET_NON_STREAMING"
3997 "histseg\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
4000 ;; -------------------------------------------------------------------------
4001 ;; ---- String matching
4002 ;; -------------------------------------------------------------------------
4006 ;; -------------------------------------------------------------------------
4008 ;; Predicated string matching.
4009 (define_insn "@aarch64_pred_<sve_int_op><mode>"
4010 [(set (match_operand:<VPRED> 0 "register_operand")
4012 [(match_operand:<VPRED> 1 "register_operand")
4013 (match_operand:SI 2 "aarch64_sve_ptrue_flag")
4015 [(match_operand:SVE_FULL_BHI 3 "register_operand")
4016 (match_operand:SVE_FULL_BHI 4 "register_operand")]
4019 (clobber (reg:CC_NZC CC_REGNUM))]
4020 "TARGET_SVE2 && TARGET_NON_STREAMING"
4021 {@ [ cons: =0, 1 , 3, 4; attrs: pred_clobber ]
4022 [ &Upa , Upl, w, w; yes ] <sve_int_op>\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>
4023 [ ?Upl , 0 , w, w; yes ] ^
4024 [ Upa , Upl, w, w; no ] ^
4028 ;; Predicated string matching in which both the flag and predicate results
4030 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_cc"
4031 [(set (reg:CC_NZC CC_REGNUM)
4033 [(match_operand:VNx16BI 1 "register_operand" "Upl")
4035 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
4038 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
4040 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
4041 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
4045 (set (match_operand:<VPRED> 0 "register_operand" "=Upa")
4055 && TARGET_NON_STREAMING
4056 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
4057 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
4058 "&& !rtx_equal_p (operands[4], operands[6])"
4060 operands[6] = copy_rtx (operands[4]);
4061 operands[7] = operands[5];
4065 ;; Predicated string matching in which only the flags result is interesting.
4066 (define_insn_and_rewrite "*aarch64_pred_<sve_int_op><mode>_ptest"
4067 [(set (reg:CC_NZC CC_REGNUM)
4069 [(match_operand:VNx16BI 1 "register_operand" "Upl")
4071 (match_operand:SI 5 "aarch64_sve_ptrue_flag")
4074 (match_operand:SI 7 "aarch64_sve_ptrue_flag")
4076 [(match_operand:SVE_FULL_BHI 2 "register_operand" "w")
4077 (match_operand:SVE_FULL_BHI 3 "register_operand" "w")]
4081 (clobber (match_scratch:<VPRED> 0 "=Upa"))]
4083 && TARGET_NON_STREAMING
4084 && aarch64_sve_same_pred_for_ptest_p (&operands[4], &operands[6])"
4085 "<sve_int_op>\t%0.<Vetype>, %1/z, %2.<Vetype>, %3.<Vetype>"
4086 "&& !rtx_equal_p (operands[4], operands[6])"
4088 operands[6] = copy_rtx (operands[4]);
4089 operands[7] = operands[5];
4093 ;; -------------------------------------------------------------------------
4094 ;; ---- Table lookup
4095 ;; -------------------------------------------------------------------------
4099 ;; -------------------------------------------------------------------------
4101 (define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
4102 [(set (match_operand:SVE_FULL_BH 0 "register_operand" "=w")
4104 [(match_operand:SVE_FULL_BH 1 "register_operand" "w")
4105 (match_operand:VNx16QI 2 "register_operand" "w")
4106 (match_operand:DI 3 "const_int_operand")
4107 (const_int LUTI_BITS)]
4109 "TARGET_LUT && TARGET_SVE2_OR_SME2"
4110 "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
4113 (define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
4114 [(set (match_operand:<VSINGLE> 0 "register_operand" "=w")
4116 [(match_operand:SVE_FULL_Hx2 1 "register_operand" "w")
4117 (match_operand:VNx16QI 2 "register_operand" "w")
4118 (match_operand:DI 3 "const_int_operand")
4119 (const_int LUTI_BITS)]
4121 "TARGET_LUT && TARGET_SVE2_OR_SME2"
4122 "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]"
4125 ;; =========================================================================
4126 ;; == Cryptographic extensions
4127 ;; =========================================================================
4129 ;; -------------------------------------------------------------------------
4130 ;; ---- Optional AES extensions
4131 ;; -------------------------------------------------------------------------
4137 ;; -------------------------------------------------------------------------
4140 (define_insn "aarch64_sve2_aes<aes_op>"
4141 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
4144 (match_operand:VNx16QI 1 "register_operand" "%0")
4145 (match_operand:VNx16QI 2 "register_operand" "w"))]
4148 "aes<aes_op>\t%0.b, %0.b, %2.b"
4149 [(set_attr "type" "crypto_aese")]
4152 ;; AESMC and AESIMC. These instructions do not take MOVPRFX.
4153 (define_insn "aarch64_sve2_aes<aesmc_op>"
4154 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
4156 [(match_operand:VNx16QI 1 "register_operand" "0")]
4159 "aes<aesmc_op>\t%0.b, %0.b"
4160 [(set_attr "type" "crypto_aesmc")]
4163 ;; When AESE/AESMC and AESD/AESIMC fusion is enabled, we really want
4164 ;; to keep the two together and enforce the register dependency without
4165 ;; scheduling or register allocation messing up the order or introducing
4166 ;; moves inbetween. Mash the two together during combine.
4168 (define_insn "*aarch64_sve2_aese_fused"
4169 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
4173 (match_operand:VNx16QI 1 "register_operand" "%0")
4174 (match_operand:VNx16QI 2 "register_operand" "w"))]
4177 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
4178 "aese\t%0.b, %0.b, %2.b\;aesmc\t%0.b, %0.b"
4179 [(set_attr "type" "crypto_aese")
4180 (set_attr "length" "8")]
4183 (define_insn "*aarch64_sve2_aesd_fused"
4184 [(set (match_operand:VNx16QI 0 "register_operand" "=w")
4188 (match_operand:VNx16QI 1 "register_operand" "%0")
4189 (match_operand:VNx16QI 2 "register_operand" "w"))]
4192 "TARGET_SVE2_AES && aarch64_fusion_enabled_p (AARCH64_FUSE_AES_AESMC)"
4193 "aesd\t%0.b, %0.b, %2.b\;aesimc\t%0.b, %0.b"
4194 [(set_attr "type" "crypto_aese")
4195 (set_attr "length" "8")]
4198 ;; -------------------------------------------------------------------------
4199 ;; ---- Optional SHA-3 extensions
4200 ;; -------------------------------------------------------------------------
4203 ;; -------------------------------------------------------------------------
4205 (define_insn "aarch64_sve2_rax1"
4206 [(set (match_operand:VNx2DI 0 "register_operand" "=w")
4209 (match_operand:VNx2DI 2 "register_operand" "w")
4211 (match_operand:VNx2DI 1 "register_operand" "w")))]
4213 "rax1\t%0.d, %1.d, %2.d"
4214 [(set_attr "type" "crypto_sha3")]
4217 ;; -------------------------------------------------------------------------
4218 ;; ---- Optional SM4 extensions
4219 ;; -------------------------------------------------------------------------
4223 ;; -------------------------------------------------------------------------
4225 ;; These instructions do not take MOVPRFX.
4226 (define_insn "aarch64_sve2_sm4e"
4227 [(set (match_operand:VNx4SI 0 "register_operand" "=w")
4229 [(match_operand:VNx4SI 1 "register_operand" "0")
4230 (match_operand:VNx4SI 2 "register_operand" "w")]
4233 "sm4e\t%0.s, %0.s, %2.s"
4234 [(set_attr "type" "crypto_sm4")]
4237 (define_insn "aarch64_sve2_sm4ekey"
4238 [(set (match_operand:VNx4SI 0 "register_operand" "=w")
4240 [(match_operand:VNx4SI 1 "register_operand" "w")
4241 (match_operand:VNx4SI 2 "register_operand" "w")]
4244 "sm4ekey\t%0.s, %1.s, %2.s"
4245 [(set_attr "type" "crypto_sm4")]